typst: fix CodeArgKey detection using character-level context
Build and Deploy Verso / deploy (push) Successful in 9m31s
Build and Deploy Verso / deploy (push) Successful in 9m31s
canShift(CodeArgKey) was consistently returning false because LALR
state merging folds the codeArgItem start state into others where
CodeArgKey is not in the valid set. As a result, named arg keys like
'columns:', 'align:', 'caption:' were always falling through to
CodeIdent (black) instead of CodeArgKey (tok-attributeName).
Fix: detect named arg key position by inspecting the nearest
non-whitespace predecessor character instead of trusting canShift.
prev == '(' or ',' means we are inside a call argument list — the only
positions where a named arg key can appear. prev == last char of a
keyword word (e.g. 'w' of 'show') correctly excludes '#show heading:'
from being treated as a named arg.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -324,9 +324,10 @@ export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||
// handle them without conflict.
|
||||
export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
const couldBeKey = stack.canShift(CodeArgKey)
|
||||
const couldBeIdent = stack.canShift(CodeIdent)
|
||||
if (!couldBeKey && !couldBeIdent) return
|
||||
// CodeArgKey detection is character-level (see below) — canShift is
|
||||
// unreliable for it due to LALR merging, so don't gate on it here.
|
||||
if (!couldBeIdent && !stack.canShift(CodeArgKey)) return
|
||||
|
||||
// Guard: only fire in code context.
|
||||
// Walk back past whitespace to the nearest non-space character.
|
||||
@@ -360,22 +361,25 @@ export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
if (KEYWORDS.has(word) || BOOLS.has(word)) return
|
||||
|
||||
// Emit CodeArgKey when this identifier is a named arg key (followed by ':').
|
||||
// Pre-disambiguating here avoids relying on LALR lookahead to choose between
|
||||
// codeArgItem alternatives, which is fragile under Lezer's state merging.
|
||||
// canShift(CodeArgKey) is unreliable here because LALR state merging can
|
||||
// fold the codeArgItem start state into others where CodeArgKey is absent.
|
||||
// Instead, use character-level context: we are inside a call argument list
|
||||
// only when the nearest non-whitespace predecessor is '(' or ','. That
|
||||
// covers every named-arg position (#set text(size: …), align: …, etc.)
|
||||
// while excluding keyword positions like '#show heading: …' (prev = 'g').
|
||||
let isArgKey = false
|
||||
if (couldBeKey) {
|
||||
if (prev === OPEN_PAREN || prev === COMMA) {
|
||||
let afterLen = len
|
||||
while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++
|
||||
isArgKey = (input.peek(afterLen) === COLON)
|
||||
}
|
||||
|
||||
for (let i = 0; i < len; i++) input.advance()
|
||||
if (isArgKey && couldBeKey) {
|
||||
if (isArgKey) {
|
||||
input.acceptToken(CodeArgKey)
|
||||
} else if (couldBeIdent) {
|
||||
input.acceptToken(CodeIdent)
|
||||
}
|
||||
// else: neither token is valid in this state — don't emit
|
||||
},
|
||||
{ contextual: true }
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user