From 2db6e63162fc82f2967eacbe39c703ab110ff115 Mon Sep 17 00:00:00 2001
From: claude <claude@verso>
Date: Tue, 9 Jun 2026 12:38:05 +0000
Subject: [PATCH] typst: fix CodeArgKey detection using character-level context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

canShift(CodeArgKey) was consistently returning false because LALR
state merging folds the codeArgItem start state into others where
CodeArgKey is not in the valid set.  As a result, named arg keys like
'columns:', 'align:', 'caption:' were always falling through to
CodeIdent (black) instead of CodeArgKey (tok-attributeName).

Fix: detect named arg key position by inspecting the nearest
non-whitespace predecessor character instead of trusting canShift.
prev == '(' or ',' means we are inside a call argument list — the only
positions where a named arg key can appear.  prev == last char of a
keyword word (e.g. 'w' of 'show') correctly excludes '#show heading:'
from being treated as a named arg.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../source-editor/lezer-typst/tokens.mjs       | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
index 6b50dffc42..23205040ba 100644
--- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
+++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
@@ -324,9 +324,10 @@ export const codeKeywordTokenizer = new ExternalTokenizer(
 // handle them without conflict.
 export const codeIdentTokenizer = new ExternalTokenizer(
   (input, stack) => {
-    const couldBeKey   = stack.canShift(CodeArgKey)
     const couldBeIdent = stack.canShift(CodeIdent)
-    if (!couldBeKey && !couldBeIdent) return
+    // CodeArgKey detection is character-level (see below) — canShift is
+    // unreliable for it due to LALR merging, so don't gate on it here.
+    if (!couldBeIdent && !stack.canShift(CodeArgKey)) return
 
     // Guard: only fire in code context.
     // Walk back past whitespace to the nearest non-space character.
@@ -360,22 +361,25 @@ export const codeIdentTokenizer = new ExternalTokenizer(
     if (KEYWORDS.has(word) || BOOLS.has(word)) return
 
     // Emit CodeArgKey when this identifier is a named arg key (followed by ':').
-    // Pre-disambiguating here avoids relying on LALR lookahead to choose between
-    // codeArgItem alternatives, which is fragile under Lezer's state merging.
+    // canShift(CodeArgKey) is unreliable here because LALR state merging can
+    // fold the codeArgItem start state into others where CodeArgKey is absent.
+    // Instead, use character-level context: we are inside a call argument list
+    // only when the nearest non-whitespace predecessor is '(' or ','.  That
+    // covers every named-arg position (#set text(size: …), align: …, etc.)
+    // while excluding keyword positions like '#show heading: …' (prev = 'g').
     let isArgKey = false
-    if (couldBeKey) {
+    if (prev === OPEN_PAREN || prev === COMMA) {
       let afterLen = len
       while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++
       isArgKey = (input.peek(afterLen) === COLON)
     }
 
     for (let i = 0; i < len; i++) input.advance()
-    if (isArgKey && couldBeKey) {
+    if (isArgKey) {
       input.acceptToken(CodeArgKey)
     } else if (couldBeIdent) {
       input.acceptToken(CodeIdent)
     }
-    // else: neither token is valid in this state — don't emit
   },
   { contextual: true }
 )