From b4a76fee6d0e0d05daae88ea66ce5c8fdcfeb5a5 Mon Sep 17 00:00:00 2001 From: Liam O'Brien Date: Tue, 19 May 2026 15:47:29 +0100 Subject: [PATCH] [web] Implementing library search (#33604) * Initial working version of library search draft fetch allowing optional search param draft debounce search draft search bar draft using for search draft search params draft data index creation draft prefix-regex search draft add fields only on search draft index setup draft search tests draft search tests for extra params draft using correct display value from bib entry for tokenization * Library search handles diacritics * Library styling and refreshing table data without reloading table * Updating mongo search query and creating migration scripts for existing data * Using Mongo query for sorting results * Moving copied files into shared directory * Addressing review comments * Pulling changes from bibtex-search-token for consistency with migration * Fixing lint * Using mongo collation for handling case and diacritics in search queries * Boosting citation keys with check for tokens * Removing double foldLatinDigraphs call * Matching figma designs for Library search component * Adding cursor for paginated Library search results * Re-fixing flash after searching library * Unit test for cursor search * Using same cursor object for search and get all results * Data migration moved to manual script GitOrigin-RevId: b7e6a1f07f775c8450dd97e7269cab3b68ca0eb3 --- .../web/frontend/extracted-translations.json | 1 + services/web/locales/en.json | 1 + .../backfill_library_references_search.mjs | 130 ++++++++++++++++++ ...create_libraryReferences_search_fields.mjs | 32 +++++ 4 files changed, 164 insertions(+) create mode 100644 services/web/scripts/backfill_library_references_search.mjs create mode 100644 tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs diff --git a/services/web/frontend/extracted-translations.json b/services/web/frontend/extracted-translations.json index 5b44bbebfc..c6e3a139cc 100644 --- a/services/web/frontend/extracted-translations.json +++ b/services/web/frontend/extracted-translations.json @@ -1692,6 +1692,7 @@ "search_command_replace": "", "search_in_all_projects": "", "search_in_archived_projects": "", + "search_in_library": "", "search_in_shared_projects": "", "search_in_trashed_projects": "", "search_in_your_projects": "", diff --git a/services/web/locales/en.json b/services/web/locales/en.json index 31f7ee6edb..9676f038fa 100644 --- a/services/web/locales/en.json +++ b/services/web/locales/en.json @@ -2229,6 +2229,7 @@ "search_command_replace": "Replace", "search_in_all_projects": "Search in all projects", "search_in_archived_projects": "Search in archived projects", + "search_in_library": "Search in your library", "search_in_shared_projects": "Search in projects shared with you", "search_in_trashed_projects": "Search in trashed projects", "search_in_your_projects": "Search in your projects", diff --git a/services/web/scripts/backfill_library_references_search.mjs b/services/web/scripts/backfill_library_references_search.mjs new file mode 100644 index 0000000000..8733c2163d --- /dev/null +++ b/services/web/scripts/backfill_library_references_search.mjs @@ -0,0 +1,130 @@ +// @ts-check +import minimist from 'minimist' +import logger from '@overleaf/logger' +import { db } from '../app/src/infrastructure/mongodb.mjs' +import { buildSearchFields } from '../modules/library/app/src/LibraryReferenceRepository.mts' +import { scriptRunner } from './lib/ScriptRunner.mjs' + +/** @typedef {import('mongodb').AnyBulkWriteOperation} AnyBulkWriteOperation */ + +const argv = minimist(process.argv.slice(2), { + boolean: ['commit', 'rollback', 'all', 'help'], + default: { 'batch-size': 1000 }, +}) + +function usage() { + logger.info( + {}, + `Usage: node backfill_library_references_search.mjs [options] + +Populates searchKey and fields.searchValue on libraryReferences so the +account-level library search can index them. Safe to rerun; picks up only +un-indexed rows by default. + +Options: + --commit Apply changes. Without this, runs as a dry run. + --rollback Unset searchKey and fields.searchValue on all rows that + have them. Mirrors the original migration's rollback. + --all Re-index every row, not just rows where searchKey is null. + Use when the tokenization format has changed. + --batch-size bulkWrite batch size (default 1000). +` + ) +} + +if (argv.help) { + usage() + process.exit(0) +} + +const BATCH_SIZE = Number(argv['batch-size']) + +/** @param {(message: string) => Promise} trackProgress */ +async function backfill(trackProgress) { + const filter = argv.all ? {} : { searchKey: null } + const cursor = db.libraryReferences + .find(filter) + .hint({ userId: 1, searchKey: 1 }) + .project({ key: 1, fields: 1 }) + + let processed = 0 + /** @type {AnyBulkWriteOperation[]} */ + let ops = [] + + const flush = async () => { + if (ops.length === 0) return + if (argv.commit) { + await db.libraryReferences.bulkWrite(ops, { ordered: false }) + } + processed += ops.length + await trackProgress( + `${argv.commit ? 'wrote' : '[dry-run]'} ${processed} docs` + ) + ops = [] + } + + for await (const doc of cursor) { + const { searchKey, fields } = buildSearchFields({ + key: doc.key, + fields: (doc.fields ?? []).map( + (/** @type {{ name: string; editableValue?: string }} */ f) => ({ + name: f.name, + editableValue: f.editableValue ?? '', + }) + ), + }) + ops.push({ + updateOne: { + filter: { _id: doc._id }, + update: { $set: { searchKey, fields } }, + }, + }) + if (ops.length >= BATCH_SIZE) { + await flush() + } + } + await flush() + await trackProgress(`done; processed ${processed} docs`) +} + +/** @param {(message: string) => Promise} trackProgress */ +async function rollback(trackProgress) { + if (!argv.commit) { + const count = await db.libraryReferences.countDocuments({ + searchKey: { $ne: null }, + }) + await trackProgress(`[dry-run] would unset search fields on ${count} docs`) + return + } + const result = await db.libraryReferences.updateMany( + { searchKey: { $ne: null } }, + { $unset: { searchKey: 1, 'fields.$[].searchValue': 1 } }, + { hint: { userId: 1, searchKey: 1 } } + ) + await trackProgress(`unset search fields on ${result.modifiedCount} docs`) +} + +/** @param {(message: string) => Promise} trackProgress */ +async function main(trackProgress) { + if (!argv.commit) { + await trackProgress('DRY RUN. Pass --commit to apply changes.') + } + if (argv.rollback) { + await rollback(trackProgress) + } else { + await backfill(trackProgress) + } +} + +try { + await scriptRunner(main, { + commit: Boolean(argv.commit), + rollback: Boolean(argv.rollback), + all: Boolean(argv.all), + batchSize: BATCH_SIZE, + }) + process.exit(0) +} catch (err) { + logger.error({ err }, 'backfill failed') + process.exit(1) +} diff --git a/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs b/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs new file mode 100644 index 0000000000..9ef1a5df45 --- /dev/null +++ b/tools/migrations/20260507120000_create_libraryReferences_search_fields.mjs @@ -0,0 +1,32 @@ +import Helpers from './lib/helpers.mjs' + +const tags = ['saas'] + +const indexes = [ + { + key: { userId: 1, searchKey: 1 }, + name: 'userId_1_searchKey_1', + collation: { locale: 'en', strength: 1 }, + }, + { + key: { userId: 1, 'fields.searchValue': 1, 'fields.name': 1 }, + name: 'userId_1_fields.searchValue_1_fields.name_1', + collation: { locale: 'en', strength: 1 }, + }, +] + +const migrate = async client => { + const { db } = client + await Helpers.addIndexesToCollection(db.libraryReferences, indexes) +} + +const rollback = async client => { + const { db } = client + await Helpers.dropIndexesFromCollection(db.libraryReferences, indexes) +} + +export default { + tags, + migrate, + rollback, +}