typst: fix CodeArgKey detection using character-level context

canShift(CodeArgKey) was consistently returning false because LALR state merging folds the codeArgItem start state into others where CodeArgKey is not in the valid set. As a result, named arg keys like 'columns:', 'align:', 'caption:' were always falling through to CodeIdent (black) instead of CodeArgKey (tok-attributeName). Fix: detect named arg key position by inspecting the nearest non-whitespace predecessor character instead of trusting canShift. prev == '(' or ',' means we are inside a call argument list — the only positions where a named arg key can appear. prev == last char of a keyword word (e.g. 'w' of 'show') correctly excludes '#show heading:' from being treated as a named arg. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-09 12:38:05 +00:00
parent f2b7034b51
commit 2db6e63162
1 changed files with 11 additions and 7 deletions
@@ -324,9 +324,10 @@ export const codeKeywordTokenizer = new ExternalTokenizer(
 // handle them without conflict.
 export const codeIdentTokenizer = new ExternalTokenizer(
  (input, stack) => {
-    const couldBeKey   = stack.canShift(CodeArgKey)
    const couldBeIdent = stack.canShift(CodeIdent)
-    if (!couldBeKey && !couldBeIdent) return
+    // CodeArgKey detection is character-level (see below) — canShift is
+    // unreliable for it due to LALR merging, so don't gate on it here.
+    if (!couldBeIdent && !stack.canShift(CodeArgKey)) return

    // Guard: only fire in code context.
    // Walk back past whitespace to the nearest non-space character.
@@ -360,22 +361,25 @@ export const codeIdentTokenizer = new ExternalTokenizer(
    if (KEYWORDS.has(word) || BOOLS.has(word)) return

    // Emit CodeArgKey when this identifier is a named arg key (followed by ':').
-    // Pre-disambiguating here avoids relying on LALR lookahead to choose between
-    // codeArgItem alternatives, which is fragile under Lezer's state merging.
+    // canShift(CodeArgKey) is unreliable here because LALR state merging can
+    // fold the codeArgItem start state into others where CodeArgKey is absent.
+    // Instead, use character-level context: we are inside a call argument list
+    // only when the nearest non-whitespace predecessor is '(' or ','.  That
+    // covers every named-arg position (#set text(size: …), align: …, etc.)
+    // while excluding keyword positions like '#show heading: …' (prev = 'g').
    let isArgKey = false
-    if (couldBeKey) {
+    if (prev === OPEN_PAREN || prev === COMMA) {
      let afterLen = len
      while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++
      isArgKey = (input.peek(afterLen) === COLON)
    }

    for (let i = 0; i < len; i++) input.advance()
-    if (isArgKey && couldBeKey) {
+    if (isArgKey) {
      input.acceptToken(CodeArgKey)
    } else if (couldBeIdent) {
      input.acceptToken(CodeIdent)
    }
-    // else: neither token is valid in this state — don't emit
  },
  { contextual: true }
 )