typst: fix CodeArgKey detection using character-level context
Build and Deploy Verso / deploy (push) Successful in 9m31s

canShift(CodeArgKey) was consistently returning false because LALR
state merging folds the codeArgItem start state into others where
CodeArgKey is not in the valid set.  As a result, named arg keys like
'columns:', 'align:', 'caption:' were always falling through to
CodeIdent (black) instead of CodeArgKey (tok-attributeName).

Fix: detect named arg key position by inspecting the nearest
non-whitespace predecessor character instead of trusting canShift.
prev == '(' or ',' means we are inside a call argument list — the only
positions where a named arg key can appear.  prev == last char of a
keyword word (e.g. 'w' of 'show') correctly excludes '#show heading:'
from being treated as a named arg.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
claude
2026-06-09 12:38:05 +00:00
parent f2b7034b51
commit 2db6e63162
@@ -324,9 +324,10 @@ export const codeKeywordTokenizer = new ExternalTokenizer(
// handle them without conflict.
export const codeIdentTokenizer = new ExternalTokenizer(
(input, stack) => {
const couldBeKey = stack.canShift(CodeArgKey)
const couldBeIdent = stack.canShift(CodeIdent)
if (!couldBeKey && !couldBeIdent) return
// CodeArgKey detection is character-level (see below) — canShift is
// unreliable for it due to LALR merging, so don't gate on it here.
if (!couldBeIdent && !stack.canShift(CodeArgKey)) return
// Guard: only fire in code context.
// Walk back past whitespace to the nearest non-space character.
@@ -360,22 +361,25 @@ export const codeIdentTokenizer = new ExternalTokenizer(
if (KEYWORDS.has(word) || BOOLS.has(word)) return
// Emit CodeArgKey when this identifier is a named arg key (followed by ':').
// Pre-disambiguating here avoids relying on LALR lookahead to choose between
// codeArgItem alternatives, which is fragile under Lezer's state merging.
// canShift(CodeArgKey) is unreliable here because LALR state merging can
// fold the codeArgItem start state into others where CodeArgKey is absent.
// Instead, use character-level context: we are inside a call argument list
// only when the nearest non-whitespace predecessor is '(' or ','. That
// covers every named-arg position (#set text(size: …), align: …, etc.)
// while excluding keyword positions like '#show heading: …' (prev = 'g').
let isArgKey = false
if (couldBeKey) {
if (prev === OPEN_PAREN || prev === COMMA) {
let afterLen = len
while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++
isArgKey = (input.peek(afterLen) === COLON)
}
for (let i = 0; i < len; i++) input.advance()
if (isArgKey && couldBeKey) {
if (isArgKey) {
input.acceptToken(CodeArgKey)
} else if (couldBeIdent) {
input.acceptToken(CodeIdent)
}
// else: neither token is valid in this state — don't emit
},
{ contextual: true }
)