fix(typst): highlight keywords/idents inside #{} code blocks
Build and Deploy Verso / deploy (push) Successful in 9m32s
Build and Deploy Verso / deploy (push) Successful in 9m32s
Replace the opaque CodeBlockBody external tokenizer with grammar-parsed
codeStatement* so that keywords (show, let, set, …) and identifiers
inside #{ } code blocks receive proper Lezer nodes and are highlighted.
Key grammar changes:
- CodeBlock { "{" codeStatement* "}" } — structured, not opaque
- codeStatement uses two explicit alternatives for keyword lines:
CodeKeyword !kw callOrValueAndBody (grabs the subject eagerly)
CodeKeyword keywordBody? (bare keyword or body-only form)
The !kw cut-point gives shift prec kw > 0 over the unannotated reduce,
resolving the LALR merge ambiguity without @left/@right on kw.
- callOrValue { FuncExpr | CodeIdent | CodeString } — replaces CallExpr
{ CodeIdent !call callSuffix* }. The * quantifier annotated both
shift and reduce with !call, making them a same-prec tie that @right
could not reliably resolve in merged states. Using FuncExpr (required
callSuffixes) + bare CodeIdent makes the tie strict (call > 0 for
FuncExpr shift vs 0 for bare-ident reduce), then @right handles only
the extension-of-callSuffixes case (shift = call<<2, FuncExpr reduce
= call<<2 - 1 via @right encoding).
- KeywordExpr gets the same two-alternative structure as codeStatement
so nested show/set/let inside a code block (e.g. show sel: set text)
also parse without LALR state-merge conflicts.
- CallExpr removed; its role is split between FuncExpr (has args/chain)
and bare CodeIdent (no args). Styling updated: CodeExpr/CodeIdent
replaces CallExpr/CodeIdent for bare #ident function-style highlights.
- codeKeywordTokenizer and codeIdentTokenizer already accept keywords /
identifiers after { and ; (added in previous commit) — consistent with
the new grammar.
Parse results:
#{ show strong: link.with(url); body }
→ CodeKeyword "show", CodeIdent "strong", FuncExpr "link.with(url)",
CodeIdent "body" — all properly highlighted, no ⚠ errors.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -49,11 +49,11 @@ export const TypstLanguage = LRLanguage.define({
|
|||||||
CodeBool: t.atom,
|
CodeBool: t.atom,
|
||||||
|
|
||||||
// Identifiers:
|
// Identifiers:
|
||||||
// CallExpr/CodeIdent — top-level #func or after keywords (#set text) → function style
|
// CodeExpr/CodeIdent — bare #func (no args) → function style
|
||||||
// FuncExpr/CodeIdent — func call inside a value expr (has args/method) → function style
|
// FuncExpr/CodeIdent — func call with args/method (#func(...), link.with(url)) → function style
|
||||||
// CodeArgKey — named arg key (tokenizer pre-disambiguates on ':') → attributeName
|
// CodeArgKey — named arg key (tokenizer pre-disambiguates on ':') → attributeName
|
||||||
// CodeIdent — plain variable/constant reference (e.g. 'left', 'center') → variable
|
// CodeIdent — plain variable/constant reference (e.g. 'left', 'center') → variable
|
||||||
'CallExpr/CodeIdent': t.function(t.variableName),
|
'CodeExpr/CodeIdent': t.function(t.variableName),
|
||||||
'FuncExpr/CodeIdent': t.function(t.variableName),
|
'FuncExpr/CodeIdent': t.function(t.variableName),
|
||||||
CodeArgKey: t.attributeName,
|
CodeArgKey: t.attributeName,
|
||||||
CodeIdent: t.variableName,
|
CodeIdent: t.variableName,
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import {
|
|||||||
RawBlockBody,
|
RawBlockBody,
|
||||||
RawBlockClose,
|
RawBlockClose,
|
||||||
RawInlineContent,
|
RawInlineContent,
|
||||||
CodeBlockBody,
|
|
||||||
BlockCommentBody,
|
BlockCommentBody,
|
||||||
LineCommentContent,
|
LineCommentContent,
|
||||||
MathContent,
|
MathContent,
|
||||||
@@ -35,6 +34,7 @@ const DOT = 46 // .
|
|||||||
const OPEN_PAREN = 40 // (
|
const OPEN_PAREN = 40 // (
|
||||||
const COMMA = 44 // ,
|
const COMMA = 44 // ,
|
||||||
const COLON = 58 // :
|
const COLON = 58 // :
|
||||||
|
const SEMICOLON = 59 // ;
|
||||||
const OPEN_ANGLE = 60 // <
|
const OPEN_ANGLE = 60 // <
|
||||||
const CLOSE_ANGLE = 62 // >
|
const CLOSE_ANGLE = 62 // >
|
||||||
|
|
||||||
@@ -188,36 +188,6 @@ export const rawInlineTokenizer = new ExternalTokenizer(
|
|||||||
{ contextual: false }
|
{ contextual: false }
|
||||||
)
|
)
|
||||||
|
|
||||||
// ── codeBlockTokenizer ──────────────────────────────────────────────────
|
|
||||||
// Emits CodeBlockBody — the interior of a #{ ... } code block.
|
|
||||||
// Tracks brace nesting depth so that inner braces (e.g. #{ f({ x }) })
|
|
||||||
// are included in the body rather than closing the outer block.
|
|
||||||
export const codeBlockTokenizer = new ExternalTokenizer(
|
|
||||||
(input, _stack) => {
|
|
||||||
// The opening '{' has already been consumed by the grammar rule.
|
|
||||||
let depth = 1
|
|
||||||
let hasContent = false
|
|
||||||
while (input.next !== -1) {
|
|
||||||
const ch = input.next
|
|
||||||
if (ch === OPEN_BRACE) {
|
|
||||||
depth++
|
|
||||||
input.advance()
|
|
||||||
hasContent = true
|
|
||||||
} else if (ch === CLOSE_BRACE) {
|
|
||||||
if (depth === 1) break // leave this '}' for the grammar rule
|
|
||||||
depth--
|
|
||||||
input.advance()
|
|
||||||
hasContent = true
|
|
||||||
} else {
|
|
||||||
input.advance()
|
|
||||||
hasContent = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (hasContent) input.acceptToken(CodeBlockBody)
|
|
||||||
},
|
|
||||||
{ contextual: false }
|
|
||||||
)
|
|
||||||
|
|
||||||
// ── blockCommentTokenizer ───────────────────────────────────────────────
|
// ── blockCommentTokenizer ───────────────────────────────────────────────
|
||||||
// Emits BlockCommentBody — the interior of a /* ... */ comment.
|
// Emits BlockCommentBody — the interior of a /* ... */ comment.
|
||||||
// Typst supports nested block comments (/* /* inner */ outer */), so this
|
// Typst supports nested block comments (/* /* inner */ outer */), so this
|
||||||
@@ -298,12 +268,12 @@ export const mathContentTokenizer = new ExternalTokenizer(
|
|||||||
export const codeKeywordTokenizer = new ExternalTokenizer(
|
export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||||
(input, stack) => {
|
(input, stack) => {
|
||||||
if (!stack.canShift(CodeKeyword)) return
|
if (!stack.canShift(CodeKeyword)) return
|
||||||
// Valid positions: immediately after '#' (normal #set, #show) or after ':'
|
// Valid positions: after '#', ':', '{' (code block start), or ';'.
|
||||||
// (show-body: '#show sel: set text(...)'). Walk back past optional whitespace.
|
// Walk back past optional whitespace.
|
||||||
let back = -1
|
let back = -1
|
||||||
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
||||||
const kwPrev = input.peek(back)
|
const kwPrev = input.peek(back)
|
||||||
if (kwPrev !== HASH && kwPrev !== COLON) return
|
if (kwPrev !== HASH && kwPrev !== COLON && kwPrev !== OPEN_BRACE && kwPrev !== SEMICOLON) return
|
||||||
|
|
||||||
// Peek ahead to read the full identifier without advancing.
|
// Peek ahead to read the full identifier without advancing.
|
||||||
let len = 0
|
let len = 0
|
||||||
@@ -355,16 +325,24 @@ export const codeIdentTokenizer = new ExternalTokenizer(
|
|||||||
const prev = input.peek(back)
|
const prev = input.peek(back)
|
||||||
|
|
||||||
if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) {
|
if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) {
|
||||||
// May be after a keyword chain like '#set text' or (in show body) 'set body':
|
if (!isIdentTail(prev)) {
|
||||||
// scan back through the preceding identifier word, skip whitespace, and
|
// prev is a structural delimiter (e.g. ')' after a function call, '{' at
|
||||||
// verify '#' or ':' precedes it. Accepting ':' lets multi-word chains
|
// block start, '}' after a nested block). These are valid statement-start
|
||||||
// like '#show sel: set text' find ':' before 'set'.
|
// positions inside a CodeBlock's codeStatement* list. Trust canShift —
|
||||||
if (!isIdentTail(prev)) return
|
// it's reliable in the grammar-parsed code-block states.
|
||||||
let b = back
|
if (!couldBeIdent) return
|
||||||
while (isIdentTail(input.peek(b))) b--
|
} else {
|
||||||
while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b--
|
// prev looks like the tail of a preceding word — scan back to find '#' or ':'.
|
||||||
const chainEnd = input.peek(b)
|
// Accepting ':' lets multi-word chains like 'show sel: set text' work.
|
||||||
if (chainEnd !== HASH && chainEnd !== COLON) return
|
let b = back
|
||||||
|
while (isIdentTail(input.peek(b))) b--
|
||||||
|
while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b--
|
||||||
|
const chainEnd = input.peek(b)
|
||||||
|
if (chainEnd !== HASH && chainEnd !== COLON) {
|
||||||
|
// Could be second+ statement in a code block (e.g. after 'let x = 1').
|
||||||
|
if (!couldBeIdent) return
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless
|
// In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
// headingTitleTokenizer — HeadingTitle: the title text to end of line
|
// headingTitleTokenizer — HeadingTitle: the title text to end of line
|
||||||
// rawTokenizer — triple-backtick raw block open/body/close
|
// rawTokenizer — triple-backtick raw block open/body/close
|
||||||
// rawInlineTokenizer — single-backtick raw inline content
|
// rawInlineTokenizer — single-backtick raw inline content
|
||||||
// codeBlockTokenizer — brace-depth tracking inside #{ ... }
|
|
||||||
// blockCommentTokenizer — depth-tracked nested /* ... */ comments
|
// blockCommentTokenizer — depth-tracked nested /* ... */ comments
|
||||||
// codeIdentTokenizer — CodeIdent: identifier, only fires in code context
|
// codeIdentTokenizer — CodeIdent: identifier, only fires in code context
|
||||||
// strongBodyTokenizer — StrongBody: content inside *...*
|
// strongBodyTokenizer — StrongBody: content inside *...*
|
||||||
@@ -62,10 +61,16 @@ RawInline { "`" RawInlineContent? "`" }
|
|||||||
// #[ ... ] — content block (re-parses as markup items)
|
// #[ ... ] — content block (re-parses as markup items)
|
||||||
CodeExpr { "#" codeExprBody }
|
CodeExpr { "#" codeExprBody }
|
||||||
|
|
||||||
|
// codeExprBody: forms valid after '#' in markup, or after ':' / '=' in a
|
||||||
|
// keyword-body. FuncExpr handles ident+callSuffix(s); bare CodeIdent handles
|
||||||
|
// a plain variable reference (#x). No CallExpr with callSuffix* here — that
|
||||||
|
// *-quantifier makes both shift and reduce carry !call precedence (a tie that
|
||||||
|
// @right cannot resolve reliably once codeStatement* state-merging is in play).
|
||||||
codeExprBody {
|
codeExprBody {
|
||||||
KeywordExpr |
|
KeywordExpr |
|
||||||
AtomExpr |
|
AtomExpr |
|
||||||
CallExpr |
|
FuncExpr |
|
||||||
|
CodeIdent |
|
||||||
CodeBlock |
|
CodeBlock |
|
||||||
ContentBlock
|
ContentBlock
|
||||||
}
|
}
|
||||||
@@ -73,18 +78,59 @@ codeExprBody {
|
|||||||
// callOrValue covers the subject of a keyword expression (#set text, #show link,
|
// callOrValue covers the subject of a keyword expression (#set text, #show link,
|
||||||
// #import "pkg", #let name). keywordBody is exclusive: ':' for show-rule bodies
|
// #import "pkg", #let name). keywordBody is exclusive: ':' for show-rule bodies
|
||||||
// and '=' for let-binding values (a keyword expression never has both).
|
// and '=' for let-binding values (a keyword expression never has both).
|
||||||
KeywordExpr { CodeKeyword callOrValue? keywordBody? }
|
// Two precedences:
|
||||||
callOrValue { CallExpr | CodeString }
|
// call @right — prefer extending callSuffixes (FuncExpr) over completing the
|
||||||
|
// FuncExpr and letting '(' start a new statement. The `!call` marker
|
||||||
|
// encodes the shift as (call << 2) and the FuncExpr reduce as
|
||||||
|
// (call << 2) - 1 (due to @right); shift > reduce, so callSuffix
|
||||||
|
// chains are greedily extended. Without @right both actions have
|
||||||
|
// the same numeric precedence and the conflict is unresolved.
|
||||||
|
// kw — prefer CodeKeyword !kw callOrValueAndBody over CodeKeyword keywordBody?
|
||||||
|
// when an identifier follows the keyword. shift = kw << 2, reduce
|
||||||
|
// (second alternative) = 0; kw > 0, no @right needed.
|
||||||
|
@precedence { call @right, kw }
|
||||||
|
|
||||||
|
// KeywordExpr: used in markup-level code (#show, #let, #set …) AND nested
|
||||||
|
// inside codeExprBody (e.g. the RHS after ':' in a show-rule).
|
||||||
|
// Same two-alternative structure as codeStatement: the !kw on the first
|
||||||
|
// alternative gives the shift prec kw > 0 over the unannotated reduce of the
|
||||||
|
// second alternative (prec 0). This avoids the call-vs-call tie that arises
|
||||||
|
// from the old `callOrValue?` optional pattern.
|
||||||
|
KeywordExpr {
|
||||||
|
CodeKeyword !kw callOrValueAndBody |
|
||||||
|
CodeKeyword keywordBody?
|
||||||
|
}
|
||||||
|
|
||||||
|
// callOrValue: FuncExpr for "ident(args)" / "ident.method", bare CodeIdent for
|
||||||
|
// a plain name, CodeString for string subjects like #import "pkg".
|
||||||
|
// FuncExpr requires at least one callSuffix, so at [CodeIdent ·] seeing '(':
|
||||||
|
// SHIFT (start callSuffixes, prec call) vs REDUCE bare CodeIdent (prec 0).
|
||||||
|
// call > 0 → shift wins cleanly.
|
||||||
|
callOrValue { FuncExpr | CodeIdent | CodeString }
|
||||||
keywordBody { ":" codeExprBody | "=" codeValue }
|
keywordBody { ":" codeExprBody | "=" codeValue }
|
||||||
AtomExpr { CodeBool }
|
AtomExpr { CodeBool }
|
||||||
|
|
||||||
// CallExpr allows zero suffixes — used at top level (#x) and after keywords
|
// codeStatement is the unit inside a CodeBlock's brace body.
|
||||||
// (#set text(...)) where even a bare identifier is valid as a named reference.
|
// Two explicit alternatives for the keyword case avoid the LALR ambiguity
|
||||||
CallExpr { CodeIdent callSuffix* }
|
// that arises from codeStatement* merging when callOrValue? is optional.
|
||||||
// FuncExpr requires at least one suffix — used inside codeValue so that
|
// The !kw annotation on the first alternative (shift callOrValueAndBody) has
|
||||||
// 'table(...)' gets tok-function while plain identifiers like 'left'/'center'
|
// higher precedence than the bare reduce of the second alternative (prec 0),
|
||||||
// get tok-variableName instead of being false-positively styled as functions.
|
// so 'show strong: …' grabs 'strong' as callOrValue rather than completing
|
||||||
FuncExpr { CodeIdent callSuffix+ }
|
// KeywordExpr early with empty callOrValue.
|
||||||
|
codeStatement {
|
||||||
|
CodeKeyword !kw callOrValueAndBody |
|
||||||
|
CodeKeyword keywordBody? |
|
||||||
|
codeValue |
|
||||||
|
";"
|
||||||
|
}
|
||||||
|
callOrValueAndBody { callOrValue keywordBody? }
|
||||||
|
|
||||||
|
// FuncExpr: identifier followed by one-or-more call suffixes.
|
||||||
|
// callSuffixes uses explicit left-recursion (not +) so the !call annotation
|
||||||
|
// on the recursive extension point gives the shift prec call vs the unannotated
|
||||||
|
// reduce of codeValue → FuncExpr (prec 0) — shift wins, no @right tie.
|
||||||
|
callSuffixes { callSuffix | callSuffixes !call callSuffix }
|
||||||
|
FuncExpr { CodeIdent !call callSuffixes }
|
||||||
callSuffix {
|
callSuffix {
|
||||||
CodeArgs |
|
CodeArgs |
|
||||||
"." CodeIdent |
|
"." CodeIdent |
|
||||||
@@ -114,8 +160,9 @@ codeValue {
|
|||||||
// Reuses codeArgList so named-key entries like (auto, 1fr) work too.
|
// Reuses codeArgList so named-key entries like (auto, 1fr) work too.
|
||||||
CodeArray { "(" codeArgList? ")" }
|
CodeArray { "(" codeArgList? ")" }
|
||||||
|
|
||||||
// CodeBlockBody depth-tracks braces so #{ let x = { 1 } } parses correctly.
|
// CodeBlock parses its content as a codeStatement* list so that keywords
|
||||||
CodeBlock { "{" CodeBlockBody? "}" }
|
// (show, let, set…) and identifiers inside braces receive proper highlighting.
|
||||||
|
CodeBlock { "{" codeStatement* "}" }
|
||||||
// ContentBlock re-enters markup mode, allowing #[*bold* text].
|
// ContentBlock re-enters markup mode, allowing #[*bold* text].
|
||||||
ContentBlock { "[" item* "]" }
|
ContentBlock { "[" item* "]" }
|
||||||
|
|
||||||
@@ -162,10 +209,6 @@ Escape { "\\" EscapeChar }
|
|||||||
RawInlineContent
|
RawInlineContent
|
||||||
}
|
}
|
||||||
|
|
||||||
@external tokens codeBlockTokenizer from "./tokens.mjs" {
|
|
||||||
CodeBlockBody
|
|
||||||
}
|
|
||||||
|
|
||||||
@external tokens blockCommentTokenizer from "./tokens.mjs" {
|
@external tokens blockCommentTokenizer from "./tokens.mjs" {
|
||||||
BlockCommentBody
|
BlockCommentBody
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -6,42 +6,40 @@ export const
|
|||||||
RawBlockBody = 4,
|
RawBlockBody = 4,
|
||||||
RawBlockClose = 5,
|
RawBlockClose = 5,
|
||||||
RawInlineContent = 6,
|
RawInlineContent = 6,
|
||||||
CodeBlockBody = 7,
|
BlockCommentBody = 7,
|
||||||
BlockCommentBody = 8,
|
LineCommentContent = 8,
|
||||||
LineCommentContent = 9,
|
MathContent = 9,
|
||||||
MathContent = 10,
|
CodeKeyword = 10,
|
||||||
CodeKeyword = 11,
|
CodeIdent = 11,
|
||||||
CodeIdent = 12,
|
CodeArgKey = 12,
|
||||||
CodeArgKey = 13,
|
StrongBody = 13,
|
||||||
StrongBody = 14,
|
EmphBody = 14,
|
||||||
EmphBody = 15,
|
Document = 15,
|
||||||
Document = 16,
|
Heading = 16,
|
||||||
Heading = 17,
|
LineComment = 17,
|
||||||
LineComment = 18,
|
BlockComment = 18,
|
||||||
BlockComment = 19,
|
RawBlock = 19,
|
||||||
RawBlock = 20,
|
RawInline = 20,
|
||||||
RawInline = 21,
|
CodeExpr = 21,
|
||||||
CodeExpr = 22,
|
KeywordExpr = 22,
|
||||||
KeywordExpr = 23,
|
FuncExpr = 23,
|
||||||
CallExpr = 24,
|
CodeArgs = 24,
|
||||||
CodeArgs = 25,
|
CodeString = 25,
|
||||||
CodeString = 26,
|
CodeNumber = 26,
|
||||||
CodeNumber = 27,
|
CodeBool = 27,
|
||||||
CodeBool = 28,
|
ContentBlock = 28,
|
||||||
FuncExpr = 29,
|
CodeBlock = 29,
|
||||||
ContentBlock = 30,
|
InlineMath = 30,
|
||||||
CodeBlock = 31,
|
CodeArray = 31,
|
||||||
InlineMath = 32,
|
AtomExpr = 32,
|
||||||
CodeArray = 33,
|
Strong = 33,
|
||||||
AtomExpr = 34,
|
Emphasis = 34,
|
||||||
Strong = 35,
|
Label = 35,
|
||||||
Emphasis = 36,
|
LabelName = 36,
|
||||||
Label = 37,
|
Ref = 37,
|
||||||
LabelName = 38,
|
RefName = 38,
|
||||||
Ref = 39,
|
Escape = 39,
|
||||||
RefName = 40,
|
EscapeChar = 40,
|
||||||
Escape = 41,
|
URL = 41,
|
||||||
EscapeChar = 42,
|
MarkupContent = 42,
|
||||||
URL = 43,
|
ClosingSquare = 43
|
||||||
MarkupContent = 44,
|
|
||||||
ClosingSquare = 45
|
|
||||||
|
|||||||
Reference in New Issue
Block a user