fix(typst): highlight keywords/idents inside #{} code blocks
Build and Deploy Verso / deploy (push) Successful in 9m32s
Build and Deploy Verso / deploy (push) Successful in 9m32s
Replace the opaque CodeBlockBody external tokenizer with grammar-parsed
codeStatement* so that keywords (show, let, set, …) and identifiers
inside #{ } code blocks receive proper Lezer nodes and are highlighted.
Key grammar changes:
- CodeBlock { "{" codeStatement* "}" } — structured, not opaque
- codeStatement uses two explicit alternatives for keyword lines:
CodeKeyword !kw callOrValueAndBody (grabs the subject eagerly)
CodeKeyword keywordBody? (bare keyword or body-only form)
The !kw cut-point gives shift prec kw > 0 over the unannotated reduce,
resolving the LALR merge ambiguity without @left/@right on kw.
- callOrValue { FuncExpr | CodeIdent | CodeString } — replaces CallExpr
{ CodeIdent !call callSuffix* }. The * quantifier annotated both
shift and reduce with !call, making them a same-prec tie that @right
could not reliably resolve in merged states. Using FuncExpr (required
callSuffixes) + bare CodeIdent makes the tie strict (call > 0 for
FuncExpr shift vs 0 for bare-ident reduce), then @right handles only
the extension-of-callSuffixes case (shift = call<<2, FuncExpr reduce
= call<<2 - 1 via @right encoding).
- KeywordExpr gets the same two-alternative structure as codeStatement
so nested show/set/let inside a code block (e.g. show sel: set text)
also parse without LALR state-merge conflicts.
- CallExpr removed; its role is split between FuncExpr (has args/chain)
and bare CodeIdent (no args). Styling updated: CodeExpr/CodeIdent
replaces CallExpr/CodeIdent for bare #ident function-style highlights.
- codeKeywordTokenizer and codeIdentTokenizer already accept keywords /
identifiers after { and ; (added in previous commit) — consistent with
the new grammar.
Parse results:
#{ show strong: link.with(url); body }
→ CodeKeyword "show", CodeIdent "strong", FuncExpr "link.with(url)",
CodeIdent "body" — all properly highlighted, no ⚠ errors.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -49,11 +49,11 @@ export const TypstLanguage = LRLanguage.define({
|
||||
CodeBool: t.atom,
|
||||
|
||||
// Identifiers:
|
||||
// CallExpr/CodeIdent — top-level #func or after keywords (#set text) → function style
|
||||
// FuncExpr/CodeIdent — func call inside a value expr (has args/method) → function style
|
||||
// CodeExpr/CodeIdent — bare #func (no args) → function style
|
||||
// FuncExpr/CodeIdent — func call with args/method (#func(...), link.with(url)) → function style
|
||||
// CodeArgKey — named arg key (tokenizer pre-disambiguates on ':') → attributeName
|
||||
// CodeIdent — plain variable/constant reference (e.g. 'left', 'center') → variable
|
||||
'CallExpr/CodeIdent': t.function(t.variableName),
|
||||
'CodeExpr/CodeIdent': t.function(t.variableName),
|
||||
'FuncExpr/CodeIdent': t.function(t.variableName),
|
||||
CodeArgKey: t.attributeName,
|
||||
CodeIdent: t.variableName,
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
RawBlockBody,
|
||||
RawBlockClose,
|
||||
RawInlineContent,
|
||||
CodeBlockBody,
|
||||
BlockCommentBody,
|
||||
LineCommentContent,
|
||||
MathContent,
|
||||
@@ -35,6 +34,7 @@ const DOT = 46 // .
|
||||
const OPEN_PAREN = 40 // (
|
||||
const COMMA = 44 // ,
|
||||
const COLON = 58 // :
|
||||
const SEMICOLON = 59 // ;
|
||||
const OPEN_ANGLE = 60 // <
|
||||
const CLOSE_ANGLE = 62 // >
|
||||
|
||||
@@ -188,36 +188,6 @@ export const rawInlineTokenizer = new ExternalTokenizer(
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── codeBlockTokenizer ──────────────────────────────────────────────────
|
||||
// Emits CodeBlockBody — the interior of a #{ ... } code block.
|
||||
// Tracks brace nesting depth so that inner braces (e.g. #{ f({ x }) })
|
||||
// are included in the body rather than closing the outer block.
|
||||
export const codeBlockTokenizer = new ExternalTokenizer(
|
||||
(input, _stack) => {
|
||||
// The opening '{' has already been consumed by the grammar rule.
|
||||
let depth = 1
|
||||
let hasContent = false
|
||||
while (input.next !== -1) {
|
||||
const ch = input.next
|
||||
if (ch === OPEN_BRACE) {
|
||||
depth++
|
||||
input.advance()
|
||||
hasContent = true
|
||||
} else if (ch === CLOSE_BRACE) {
|
||||
if (depth === 1) break // leave this '}' for the grammar rule
|
||||
depth--
|
||||
input.advance()
|
||||
hasContent = true
|
||||
} else {
|
||||
input.advance()
|
||||
hasContent = true
|
||||
}
|
||||
}
|
||||
if (hasContent) input.acceptToken(CodeBlockBody)
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── blockCommentTokenizer ───────────────────────────────────────────────
|
||||
// Emits BlockCommentBody — the interior of a /* ... */ comment.
|
||||
// Typst supports nested block comments (/* /* inner */ outer */), so this
|
||||
@@ -298,12 +268,12 @@ export const mathContentTokenizer = new ExternalTokenizer(
|
||||
export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (!stack.canShift(CodeKeyword)) return
|
||||
// Valid positions: immediately after '#' (normal #set, #show) or after ':'
|
||||
// (show-body: '#show sel: set text(...)'). Walk back past optional whitespace.
|
||||
// Valid positions: after '#', ':', '{' (code block start), or ';'.
|
||||
// Walk back past optional whitespace.
|
||||
let back = -1
|
||||
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
||||
const kwPrev = input.peek(back)
|
||||
if (kwPrev !== HASH && kwPrev !== COLON) return
|
||||
if (kwPrev !== HASH && kwPrev !== COLON && kwPrev !== OPEN_BRACE && kwPrev !== SEMICOLON) return
|
||||
|
||||
// Peek ahead to read the full identifier without advancing.
|
||||
let len = 0
|
||||
@@ -355,16 +325,24 @@ export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
const prev = input.peek(back)
|
||||
|
||||
if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) {
|
||||
// May be after a keyword chain like '#set text' or (in show body) 'set body':
|
||||
// scan back through the preceding identifier word, skip whitespace, and
|
||||
// verify '#' or ':' precedes it. Accepting ':' lets multi-word chains
|
||||
// like '#show sel: set text' find ':' before 'set'.
|
||||
if (!isIdentTail(prev)) return
|
||||
if (!isIdentTail(prev)) {
|
||||
// prev is a structural delimiter (e.g. ')' after a function call, '{' at
|
||||
// block start, '}' after a nested block). These are valid statement-start
|
||||
// positions inside a CodeBlock's codeStatement* list. Trust canShift —
|
||||
// it's reliable in the grammar-parsed code-block states.
|
||||
if (!couldBeIdent) return
|
||||
} else {
|
||||
// prev looks like the tail of a preceding word — scan back to find '#' or ':'.
|
||||
// Accepting ':' lets multi-word chains like 'show sel: set text' work.
|
||||
let b = back
|
||||
while (isIdentTail(input.peek(b))) b--
|
||||
while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b--
|
||||
const chainEnd = input.peek(b)
|
||||
if (chainEnd !== HASH && chainEnd !== COLON) return
|
||||
if (chainEnd !== HASH && chainEnd !== COLON) {
|
||||
// Could be second+ statement in a code block (e.g. after 'let x = 1').
|
||||
if (!couldBeIdent) return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
// headingTitleTokenizer — HeadingTitle: the title text to end of line
|
||||
// rawTokenizer — triple-backtick raw block open/body/close
|
||||
// rawInlineTokenizer — single-backtick raw inline content
|
||||
// codeBlockTokenizer — brace-depth tracking inside #{ ... }
|
||||
// blockCommentTokenizer — depth-tracked nested /* ... */ comments
|
||||
// codeIdentTokenizer — CodeIdent: identifier, only fires in code context
|
||||
// strongBodyTokenizer — StrongBody: content inside *...*
|
||||
@@ -62,10 +61,16 @@ RawInline { "`" RawInlineContent? "`" }
|
||||
// #[ ... ] — content block (re-parses as markup items)
|
||||
CodeExpr { "#" codeExprBody }
|
||||
|
||||
// codeExprBody: forms valid after '#' in markup, or after ':' / '=' in a
|
||||
// keyword-body. FuncExpr handles ident+callSuffix(s); bare CodeIdent handles
|
||||
// a plain variable reference (#x). No CallExpr with callSuffix* here — that
|
||||
// *-quantifier makes both shift and reduce carry !call precedence (a tie that
|
||||
// @right cannot resolve reliably once codeStatement* state-merging is in play).
|
||||
codeExprBody {
|
||||
KeywordExpr |
|
||||
AtomExpr |
|
||||
CallExpr |
|
||||
FuncExpr |
|
||||
CodeIdent |
|
||||
CodeBlock |
|
||||
ContentBlock
|
||||
}
|
||||
@@ -73,18 +78,59 @@ codeExprBody {
|
||||
// callOrValue covers the subject of a keyword expression (#set text, #show link,
|
||||
// #import "pkg", #let name). keywordBody is exclusive: ':' for show-rule bodies
|
||||
// and '=' for let-binding values (a keyword expression never has both).
|
||||
KeywordExpr { CodeKeyword callOrValue? keywordBody? }
|
||||
callOrValue { CallExpr | CodeString }
|
||||
// Two precedences:
|
||||
// call @right — prefer extending callSuffixes (FuncExpr) over completing the
|
||||
// FuncExpr and letting '(' start a new statement. The `!call` marker
|
||||
// encodes the shift as (call << 2) and the FuncExpr reduce as
|
||||
// (call << 2) - 1 (due to @right); shift > reduce, so callSuffix
|
||||
// chains are greedily extended. Without @right both actions have
|
||||
// the same numeric precedence and the conflict is unresolved.
|
||||
// kw — prefer CodeKeyword !kw callOrValueAndBody over CodeKeyword keywordBody?
|
||||
// when an identifier follows the keyword. shift = kw << 2, reduce
|
||||
// (second alternative) = 0; kw > 0, no @right needed.
|
||||
@precedence { call @right, kw }
|
||||
|
||||
// KeywordExpr: used in markup-level code (#show, #let, #set …) AND nested
|
||||
// inside codeExprBody (e.g. the RHS after ':' in a show-rule).
|
||||
// Same two-alternative structure as codeStatement: the !kw on the first
|
||||
// alternative gives the shift prec kw > 0 over the unannotated reduce of the
|
||||
// second alternative (prec 0). This avoids the call-vs-call tie that arises
|
||||
// from the old `callOrValue?` optional pattern.
|
||||
KeywordExpr {
|
||||
CodeKeyword !kw callOrValueAndBody |
|
||||
CodeKeyword keywordBody?
|
||||
}
|
||||
|
||||
// callOrValue: FuncExpr for "ident(args)" / "ident.method", bare CodeIdent for
|
||||
// a plain name, CodeString for string subjects like #import "pkg".
|
||||
// FuncExpr requires at least one callSuffix, so at [CodeIdent ·] seeing '(':
|
||||
// SHIFT (start callSuffixes, prec call) vs REDUCE bare CodeIdent (prec 0).
|
||||
// call > 0 → shift wins cleanly.
|
||||
callOrValue { FuncExpr | CodeIdent | CodeString }
|
||||
keywordBody { ":" codeExprBody | "=" codeValue }
|
||||
AtomExpr { CodeBool }
|
||||
|
||||
// CallExpr allows zero suffixes — used at top level (#x) and after keywords
|
||||
// (#set text(...)) where even a bare identifier is valid as a named reference.
|
||||
CallExpr { CodeIdent callSuffix* }
|
||||
// FuncExpr requires at least one suffix — used inside codeValue so that
|
||||
// 'table(...)' gets tok-function while plain identifiers like 'left'/'center'
|
||||
// get tok-variableName instead of being false-positively styled as functions.
|
||||
FuncExpr { CodeIdent callSuffix+ }
|
||||
// codeStatement is the unit inside a CodeBlock's brace body.
|
||||
// Two explicit alternatives for the keyword case avoid the LALR ambiguity
|
||||
// that arises from codeStatement* merging when callOrValue? is optional.
|
||||
// The !kw annotation on the first alternative (shift callOrValueAndBody) has
|
||||
// higher precedence than the bare reduce of the second alternative (prec 0),
|
||||
// so 'show strong: …' grabs 'strong' as callOrValue rather than completing
|
||||
// KeywordExpr early with empty callOrValue.
|
||||
codeStatement {
|
||||
CodeKeyword !kw callOrValueAndBody |
|
||||
CodeKeyword keywordBody? |
|
||||
codeValue |
|
||||
";"
|
||||
}
|
||||
callOrValueAndBody { callOrValue keywordBody? }
|
||||
|
||||
// FuncExpr: identifier followed by one-or-more call suffixes.
|
||||
// callSuffixes uses explicit left-recursion (not +) so the !call annotation
|
||||
// on the recursive extension point gives the shift prec call vs the unannotated
|
||||
// reduce of codeValue → FuncExpr (prec 0) — shift wins, no @right tie.
|
||||
callSuffixes { callSuffix | callSuffixes !call callSuffix }
|
||||
FuncExpr { CodeIdent !call callSuffixes }
|
||||
callSuffix {
|
||||
CodeArgs |
|
||||
"." CodeIdent |
|
||||
@@ -114,8 +160,9 @@ codeValue {
|
||||
// Reuses codeArgList so named-key entries like (auto, 1fr) work too.
|
||||
CodeArray { "(" codeArgList? ")" }
|
||||
|
||||
// CodeBlockBody depth-tracks braces so #{ let x = { 1 } } parses correctly.
|
||||
CodeBlock { "{" CodeBlockBody? "}" }
|
||||
// CodeBlock parses its content as a codeStatement* list so that keywords
|
||||
// (show, let, set…) and identifiers inside braces receive proper highlighting.
|
||||
CodeBlock { "{" codeStatement* "}" }
|
||||
// ContentBlock re-enters markup mode, allowing #[*bold* text].
|
||||
ContentBlock { "[" item* "]" }
|
||||
|
||||
@@ -162,10 +209,6 @@ Escape { "\\" EscapeChar }
|
||||
RawInlineContent
|
||||
}
|
||||
|
||||
@external tokens codeBlockTokenizer from "./tokens.mjs" {
|
||||
CodeBlockBody
|
||||
}
|
||||
|
||||
@external tokens blockCommentTokenizer from "./tokens.mjs" {
|
||||
BlockCommentBody
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -6,42 +6,40 @@ export const
|
||||
RawBlockBody = 4,
|
||||
RawBlockClose = 5,
|
||||
RawInlineContent = 6,
|
||||
CodeBlockBody = 7,
|
||||
BlockCommentBody = 8,
|
||||
LineCommentContent = 9,
|
||||
MathContent = 10,
|
||||
CodeKeyword = 11,
|
||||
CodeIdent = 12,
|
||||
CodeArgKey = 13,
|
||||
StrongBody = 14,
|
||||
EmphBody = 15,
|
||||
Document = 16,
|
||||
Heading = 17,
|
||||
LineComment = 18,
|
||||
BlockComment = 19,
|
||||
RawBlock = 20,
|
||||
RawInline = 21,
|
||||
CodeExpr = 22,
|
||||
KeywordExpr = 23,
|
||||
CallExpr = 24,
|
||||
CodeArgs = 25,
|
||||
CodeString = 26,
|
||||
CodeNumber = 27,
|
||||
CodeBool = 28,
|
||||
FuncExpr = 29,
|
||||
ContentBlock = 30,
|
||||
CodeBlock = 31,
|
||||
InlineMath = 32,
|
||||
CodeArray = 33,
|
||||
AtomExpr = 34,
|
||||
Strong = 35,
|
||||
Emphasis = 36,
|
||||
Label = 37,
|
||||
LabelName = 38,
|
||||
Ref = 39,
|
||||
RefName = 40,
|
||||
Escape = 41,
|
||||
EscapeChar = 42,
|
||||
URL = 43,
|
||||
MarkupContent = 44,
|
||||
ClosingSquare = 45
|
||||
BlockCommentBody = 7,
|
||||
LineCommentContent = 8,
|
||||
MathContent = 9,
|
||||
CodeKeyword = 10,
|
||||
CodeIdent = 11,
|
||||
CodeArgKey = 12,
|
||||
StrongBody = 13,
|
||||
EmphBody = 14,
|
||||
Document = 15,
|
||||
Heading = 16,
|
||||
LineComment = 17,
|
||||
BlockComment = 18,
|
||||
RawBlock = 19,
|
||||
RawInline = 20,
|
||||
CodeExpr = 21,
|
||||
KeywordExpr = 22,
|
||||
FuncExpr = 23,
|
||||
CodeArgs = 24,
|
||||
CodeString = 25,
|
||||
CodeNumber = 26,
|
||||
CodeBool = 27,
|
||||
ContentBlock = 28,
|
||||
CodeBlock = 29,
|
||||
InlineMath = 30,
|
||||
CodeArray = 31,
|
||||
AtomExpr = 32,
|
||||
Strong = 33,
|
||||
Emphasis = 34,
|
||||
Label = 35,
|
||||
LabelName = 36,
|
||||
Ref = 37,
|
||||
RefName = 38,
|
||||
Escape = 39,
|
||||
EscapeChar = 40,
|
||||
URL = 41,
|
||||
MarkupContent = 42,
|
||||
ClosingSquare = 43
|
||||
|
||||
Reference in New Issue
Block a user