From 47cf84f20b7fb486547cb3688d73d1b33763572a Mon Sep 17 00:00:00 2001 From: claude Date: Tue, 9 Jun 2026 13:57:10 +0000 Subject: [PATCH] fix(typst): highlight named arg keys after complex nested calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit canShift(CodeIdent) returns false in LALR-merged states that arise after reducing a complex first argument (e.g. figure(table(...), caption: ...)). The previous guard `!couldBeIdent && !canShift(CodeArgKey)` then caused an early exit before the character-level scan ran, silently dropping the CodeArgKey token for any named arg key that follows such a reduction. Fix: run the backward character scan first and derive `couldBeArgKey` from the raw predecessor char ('(' or ',') rather than from canShift. The early-exit now reads `!couldBeIdent && !couldBeArgKey`, so arg-key positions always proceed to the full scan regardless of parser state. Also stop calling canShift(CodeArgKey) entirely — it is unreliable here. Co-Authored-By: Claude Sonnet 4.6 --- .../source-editor/lezer-typst/tokens.mjs | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 23205040ba..65d3098f0d 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -322,26 +322,36 @@ export const codeKeywordTokenizer = new ExternalTokenizer( // // Keywords and bools are excluded so codeKeywordTokenizer / CodeBool can // handle them without conflict. +// +// The backward scan runs BEFORE any canShift gate. canShift(CodeArgKey) is +// unreliable (LALR state merging can suppress it even at genuine arg-key +// positions, e.g. 'caption:' after a complex nested call like 'table(...)'). +// We derive couldBeArgKey from character-level evidence ('(' or ',') and use +// that to decide whether to continue even when canShift(CodeIdent) is false. export const codeIdentTokenizer = new ExternalTokenizer( (input, stack) => { const couldBeIdent = stack.canShift(CodeIdent) - // CodeArgKey detection is character-level (see below) — canShift is - // unreliable for it due to LALR merging, so don't gate on it here. - if (!couldBeIdent && !stack.canShift(CodeArgKey)) return - // Guard: only fire in code context. - // Walk back past whitespace to the nearest non-space character. + // Walk back past whitespace — primary context discriminator. let back = -1 while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back-- const prev = input.peek(back) + if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA) { // May be after a keyword like '#set' or '#show': scan back through the // keyword word itself and check that '#' immediately precedes it. if (!isIdentTail(prev)) return - while (isIdentTail(input.peek(back))) back-- - if (input.peek(back) !== HASH) return + let b = back + while (isIdentTail(input.peek(b))) b-- + if (input.peek(b) !== HASH) return } + // In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless + // of canShift(CodeIdent) — LALR merging can suppress canShift(CodeIdent) + // after a complex first argument (e.g. figure(table(...), caption: ...)). + const couldBeArgKey = prev === OPEN_PAREN || prev === COMMA + if (!couldBeIdent && !couldBeArgKey) return + // Must start with an identifier head character. if (!isIdentHead(input.next)) return @@ -360,15 +370,10 @@ export const codeIdentTokenizer = new ExternalTokenizer( // Let codeKeywordTokenizer handle keywords; let CodeBool handle bools. if (KEYWORDS.has(word) || BOOLS.has(word)) return - // Emit CodeArgKey when this identifier is a named arg key (followed by ':'). - // canShift(CodeArgKey) is unreliable here because LALR state merging can - // fold the codeArgItem start state into others where CodeArgKey is absent. - // Instead, use character-level context: we are inside a call argument list - // only when the nearest non-whitespace predecessor is '(' or ','. That - // covers every named-arg position (#set text(size: …), align: …, etc.) - // while excluding keyword positions like '#show heading: …' (prev = 'g'). + // Emit CodeArgKey when this identifier is immediately followed by ':'. + // Only applies in arg-delimiter positions (couldBeArgKey). let isArgKey = false - if (prev === OPEN_PAREN || prev === COMMA) { + if (couldBeArgKey) { let afterLen = len while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++ isArgKey = (input.peek(afterLen) === COLON)