From 2db6e63162fc82f2967eacbe39c703ab110ff115 Mon Sep 17 00:00:00 2001 From: claude Date: Tue, 9 Jun 2026 12:38:05 +0000 Subject: [PATCH] typst: fix CodeArgKey detection using character-level context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit canShift(CodeArgKey) was consistently returning false because LALR state merging folds the codeArgItem start state into others where CodeArgKey is not in the valid set. As a result, named arg keys like 'columns:', 'align:', 'caption:' were always falling through to CodeIdent (black) instead of CodeArgKey (tok-attributeName). Fix: detect named arg key position by inspecting the nearest non-whitespace predecessor character instead of trusting canShift. prev == '(' or ',' means we are inside a call argument list — the only positions where a named arg key can appear. prev == last char of a keyword word (e.g. 'w' of 'show') correctly excludes '#show heading:' from being treated as a named arg. Co-Authored-By: Claude Sonnet 4.6 --- .../source-editor/lezer-typst/tokens.mjs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 6b50dffc42..23205040ba 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -324,9 +324,10 @@ export const codeKeywordTokenizer = new ExternalTokenizer( // handle them without conflict. export const codeIdentTokenizer = new ExternalTokenizer( (input, stack) => { - const couldBeKey = stack.canShift(CodeArgKey) const couldBeIdent = stack.canShift(CodeIdent) - if (!couldBeKey && !couldBeIdent) return + // CodeArgKey detection is character-level (see below) — canShift is + // unreliable for it due to LALR merging, so don't gate on it here. + if (!couldBeIdent && !stack.canShift(CodeArgKey)) return // Guard: only fire in code context. // Walk back past whitespace to the nearest non-space character. @@ -360,22 +361,25 @@ export const codeIdentTokenizer = new ExternalTokenizer( if (KEYWORDS.has(word) || BOOLS.has(word)) return // Emit CodeArgKey when this identifier is a named arg key (followed by ':'). - // Pre-disambiguating here avoids relying on LALR lookahead to choose between - // codeArgItem alternatives, which is fragile under Lezer's state merging. + // canShift(CodeArgKey) is unreliable here because LALR state merging can + // fold the codeArgItem start state into others where CodeArgKey is absent. + // Instead, use character-level context: we are inside a call argument list + // only when the nearest non-whitespace predecessor is '(' or ','. That + // covers every named-arg position (#set text(size: …), align: …, etc.) + // while excluding keyword positions like '#show heading: …' (prev = 'g'). let isArgKey = false - if (couldBeKey) { + if (prev === OPEN_PAREN || prev === COMMA) { let afterLen = len while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++ isArgKey = (input.peek(afterLen) === COLON) } for (let i = 0; i < len; i++) input.advance() - if (isArgKey && couldBeKey) { + if (isArgKey) { input.acceptToken(CodeArgKey) } else if (couldBeIdent) { input.acceptToken(CodeIdent) } - // else: neither token is valid in this state — don't emit }, { contextual: true } )