diff --git a/services/web/frontend/extracted-translations.json b/services/web/frontend/extracted-translations.json index 5b44bbebfc..c6e3a139cc 100644 --- a/services/web/frontend/extracted-translations.json +++ b/services/web/frontend/extracted-translations.json @@ -1692,6 +1692,7 @@ "search_command_replace": "", "search_in_all_projects": "", "search_in_archived_projects": "", + "search_in_library": "", "search_in_shared_projects": "", "search_in_trashed_projects": "", "search_in_your_projects": "", diff --git a/services/web/locales/en.json b/services/web/locales/en.json index 31f7ee6edb..9676f038fa 100644 --- a/services/web/locales/en.json +++ b/services/web/locales/en.json @@ -2229,6 +2229,7 @@ "search_command_replace": "Replace", "search_in_all_projects": "Search in all projects", "search_in_archived_projects": "Search in archived projects", + "search_in_library": "Search in your library", "search_in_shared_projects": "Search in projects shared with you", "search_in_trashed_projects": "Search in trashed projects", "search_in_your_projects": "Search in your projects", diff --git a/services/web/scripts/backfill_library_references_search.mjs b/services/web/scripts/backfill_library_references_search.mjs new file mode 100644 index 0000000000..8733c2163d --- /dev/null +++ b/services/web/scripts/backfill_library_references_search.mjs @@ -0,0 +1,130 @@ +// @ts-check +import minimist from 'minimist' +import logger from '@overleaf/logger' +import { db } from '../app/src/infrastructure/mongodb.mjs' +import { buildSearchFields } from '../modules/library/app/src/LibraryReferenceRepository.mts' +import { scriptRunner } from './lib/ScriptRunner.mjs' + +/** @typedef {import('mongodb').AnyBulkWriteOperation} AnyBulkWriteOperation */ + +const argv = minimist(process.argv.slice(2), { + boolean: ['commit', 'rollback', 'all', 'help'], + default: { 'batch-size': 1000 }, +}) + +function usage() { + logger.info( + {}, + `Usage: node backfill_library_references_search.mjs [options] + +Populates searchKey and fields.searchValue on libraryReferences so the +account-level library search can index them. Safe to rerun; picks up only +un-indexed rows by default. + +Options: + --commit Apply changes. Without this, runs as a dry run. + --rollback Unset searchKey and fields.searchValue on all rows that + have them. Mirrors the original migration's rollback. + --all Re-index every row, not just rows where searchKey is null. + Use when the tokenization format has changed. + --batch-size bulkWrite batch size (default 1000). +` + ) +} + +if (argv.help) { + usage() + process.exit(0) +} + +const BATCH_SIZE = Number(argv['batch-size']) + +/** @param {(message: string) => Promise} trackProgress */ +async function backfill(trackProgress) { + const filter = argv.all ? {} : { searchKey: null } + const cursor = db.libraryReferences + .find(filter) + .hint({ userId: 1, searchKey: 1 }) + .project({ key: 1, fields: 1 }) + + let processed = 0 + /** @type {AnyBulkWriteOperation[]} */ + let ops = [] + + const flush = async () => { + if (ops.length === 0) return + if (argv.commit) { + await db.libraryReferences.bulkWrite(ops, { ordered: false }) + } + processed += ops.length + await trackProgress( + `${argv.commit ? 'wrote' : '[dry-run]'} ${processed} docs` + ) + ops = [] + } + + for await (const doc of cursor) { + const { searchKey, fields } = buildSearchFields({ + key: doc.key, + fields: (doc.fields ?? []).map( + (/** @type {{ name: string; editableValue?: string }} */ f) => ({ + name: f.name, + editableValue: f.editableValue ?? '', + }) + ), + }) + ops.push({ + updateOne: { + filter: { _id: doc._id }, + update: { $set: { searchKey, fields } }, + }, + }) + if (ops.length >= BATCH_SIZE) { + await flush() + } + } + await flush() + await trackProgress(`done; processed ${processed} docs`) +} + +/** @param {(message: string) => Promise} trackProgress */ +async function rollback(trackProgress) { + if (!argv.commit) { + const count = await db.libraryReferences.countDocuments({ + searchKey: { $ne: null }, + }) + await trackProgress(`[dry-run] would unset search fields on ${count} docs`) + return + } + const result = await db.libraryReferences.updateMany( + { searchKey: { $ne: null } }, + { $unset: { searchKey: 1, 'fields.$[].searchValue': 1 } }, + { hint: { userId: 1, searchKey: 1 } } + ) + await trackProgress(`unset search fields on ${result.modifiedCount} docs`) +} + +/** @param {(message: string) => Promise} trackProgress */ +async function main(trackProgress) { + if (!argv.commit) { + await trackProgress('DRY RUN. Pass --commit to apply changes.') + } + if (argv.rollback) { + await rollback(trackProgress) + } else { + await backfill(trackProgress) + } +} + +try { + await scriptRunner(main, { + commit: Boolean(argv.commit), + rollback: Boolean(argv.rollback), + all: Boolean(argv.all), + batchSize: BATCH_SIZE, + }) + process.exit(0) +} catch (err) { + logger.error({ err }, 'backfill failed') + process.exit(1) +} diff --git a/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs b/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs new file mode 100644 index 0000000000..9ef1a5df45 --- /dev/null +++ b/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs @@ -0,0 +1,32 @@ +import Helpers from './lib/helpers.mjs' + +const tags = ['saas'] + +const indexes = [ + { + key: { userId: 1, searchKey: 1 }, + name: 'userId_1_searchKey_1', + collation: { locale: 'en', strength: 1 }, + }, + { + key: { userId: 1, 'fields.searchValue': 1, 'fields.name': 1 }, + name: 'userId_1_fields.searchValue_1_fields.name_1', + collation: { locale: 'en', strength: 1 }, + }, +] + +const migrate = async client => { + const { db } = client + await Helpers.addIndexesToCollection(db.libraryReferences, indexes) +} + +const rollback = async client => { + const { db } = client + await Helpers.dropIndexesFromCollection(db.libraryReferences, indexes) +} + +export default { + tags, + migrate, + rollback, +}