feat(typst): parse show-rule bodies, let-value bindings, and content-block call args
Build and Deploy Verso / deploy (push) Successful in 14m13s
Build and Deploy Verso / deploy (push) Successful in 14m13s
Three grammar gaps caused large blocks of code to be unhighlighted:
1. KeywordExpr now accepts an exclusive keywordBody: '#show sel: body' is
parsed via ':', and '#let name = value' via '='. callOrValue extends
the subject to include CodeString so '#import "pkg"' highlights the path.
2. ContentBlock added to callSuffix so '#func("arg")[content]' and
'#next-step("url")[...]' parse their trailing content block as code
rather than falling back to markup.
3. Tokenizer: COLON added as a valid predecessor so identifiers (e.g. 'blue'
in 'fill: blue') and keywords (e.g. 'set' in '#show link: set text(...)')
are recognised after ':'. EQUALS already added in the previous commit.
The ident-chain backward scan now also skips whitespace before testing for
'#' or ':', enabling 'text' in 'set text' to trace back to '#' through the
keyword gap. @precedence updated with CodeString, '[', ':' to resolve
overlapping-token conflicts with MarkupContent in merged states.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -285,8 +285,12 @@ export const mathContentTokenizer = new ExternalTokenizer(
|
||||
export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (!stack.canShift(CodeKeyword)) return
|
||||
// Only fire right after '#'; any other predecessor means we are in body text.
|
||||
if (input.peek(-1) !== HASH) return
|
||||
// Valid positions: immediately after '#' (normal #set, #show) or after ':'
|
||||
// (show-body: '#show sel: set text(...)'). Walk back past optional whitespace.
|
||||
let back = -1
|
||||
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
||||
const kwPrev = input.peek(back)
|
||||
if (kwPrev !== HASH && kwPrev !== COLON) return
|
||||
|
||||
// Peek ahead to read the full identifier without advancing.
|
||||
let len = 0
|
||||
@@ -337,18 +341,23 @@ export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
||||
const prev = input.peek(back)
|
||||
|
||||
if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA) {
|
||||
// May be after a keyword like '#set' or '#show': scan back through the
|
||||
// keyword word itself and check that '#' immediately precedes it.
|
||||
if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) {
|
||||
// May be after a keyword chain like '#set text' or (in show body) 'set body':
|
||||
// scan back through the preceding identifier word, skip whitespace, and
|
||||
// verify '#' or ':' precedes it. Accepting ':' lets multi-word chains
|
||||
// like '#show sel: set text' find ':' before 'set'.
|
||||
if (!isIdentTail(prev)) return
|
||||
let b = back
|
||||
while (isIdentTail(input.peek(b))) b--
|
||||
if (input.peek(b) !== HASH) return
|
||||
while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b--
|
||||
const chainEnd = input.peek(b)
|
||||
if (chainEnd !== HASH && chainEnd !== COLON) return
|
||||
}
|
||||
|
||||
// In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless
|
||||
// of canShift(CodeIdent) — LALR merging can suppress canShift(CodeIdent)
|
||||
// after a complex first argument (e.g. figure(table(...), caption: ...)).
|
||||
// ':' and '=' are value positions, NOT arg-key positions.
|
||||
const couldBeArgKey = prev === OPEN_PAREN || prev === COMMA
|
||||
if (!couldBeIdent && !couldBeArgKey) return
|
||||
|
||||
|
||||
@@ -68,10 +68,12 @@ codeExprBody {
|
||||
ContentBlock
|
||||
}
|
||||
|
||||
// CallExpr? covers '#set text(size: 12pt)', '#show heading: ...', etc.
|
||||
// The optional CallExpr is only shifted when the next token is CodeIdent,
|
||||
// so there is no shift/reduce conflict with other items that follow keywords.
|
||||
KeywordExpr { CodeKeyword CallExpr? }
|
||||
// callOrValue covers the subject of a keyword expression (#set text, #show link,
|
||||
// #import "pkg", #let name). keywordBody is exclusive: ':' for show-rule bodies
|
||||
// and '=' for let-binding values (a keyword expression never has both).
|
||||
KeywordExpr { CodeKeyword callOrValue? keywordBody? }
|
||||
callOrValue { CallExpr | CodeString }
|
||||
keywordBody { ":" codeExprBody | "=" codeValue }
|
||||
AtomExpr { CodeBool }
|
||||
|
||||
// CallExpr allows zero suffixes — used at top level (#x) and after keywords
|
||||
@@ -83,7 +85,8 @@ CallExpr { CodeIdent callSuffix* }
|
||||
FuncExpr { CodeIdent callSuffix+ }
|
||||
callSuffix {
|
||||
CodeArgs |
|
||||
"." CodeIdent
|
||||
"." CodeIdent |
|
||||
ContentBlock
|
||||
}
|
||||
|
||||
CodeArgs { "(" codeArgList? ")" }
|
||||
@@ -235,7 +238,10 @@ Escape { "\\" EscapeChar }
|
||||
// by MarkupContent (redundant since '_' is in MarkupContent's exclusion
|
||||
// set, but kept for clarity).
|
||||
// CodeIdent and StrongText/EmphText are now external tokens — not listed.
|
||||
@precedence { CodeBool EscapeChar "(" "." "]" "_" spaces MarkupContent }
|
||||
// "[" > MarkupContent: ContentBlock callSuffix wins in merged code/markup states.
|
||||
// CodeString > MarkupContent: '"' starts a string literal after a keyword.
|
||||
// ":" > MarkupContent: keywordBody ':' wins over markup colon in code states.
|
||||
@precedence { CodeBool EscapeChar CodeString "[" ":" "(" "." "]" "_" spaces MarkupContent }
|
||||
}
|
||||
|
||||
@skip { spaces }
|
||||
|
||||
Reference in New Issue
Block a user