typst: fix named-arg key highlighting and multi-line math
Build and Deploy Verso / deploy (push) Successful in 13m41s
Build and Deploy Verso / deploy (push) Successful in 13m41s
Named arg keys (columns:, align:, caption:) were appearing in black because LALR state merging broke the CodeArgs/CodeIdent path for multi-line expressions. Fix: emit a dedicated CodeArgKey token from codeIdentTokenizer (forward-peek for ':' to pre-disambiguate), declare it in the grammar's codeArgItem rule, and map it to t.attributeName in styleTags — bypassing LALR lookahead entirely. Multi-line display math ($ ...\n... $) was consuming the rest of the document as orange text when contextual:true caused a backward scan to find a previous closing '$' and falsely set isDisplay=true. Fix: revert mathContentTokenizer to contextual:false with '\n' stop (each MathContent token covers one line), and change InlineMath to MathContent* so @skip consumes the newlines between lines. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,9 +14,8 @@ import { typstDocumentOutline } from './document-outline'
|
||||
// Note on tree structure: rules starting with a lowercase letter in the grammar
|
||||
// are inline (no tree node), so their children are promoted to the parent.
|
||||
// E.g. codeArgItem, codeValue, callSuffix, codeArgList are all inline.
|
||||
// Therefore:
|
||||
// - The named-argument key "CodeIdent" is a *direct* child of CodeArgs.
|
||||
// - Positional arguments that are identifiers are wrapped in CallExpr.
|
||||
// Named arg keys emit CodeArgKey (not CodeIdent) via codeIdentTokenizer,
|
||||
// so CodeArgKey appears at the same level as other codeArgItem children.
|
||||
|
||||
export const TypstLanguage = LRLanguage.define({
|
||||
name: 'typst',
|
||||
@@ -51,10 +50,10 @@ export const TypstLanguage = LRLanguage.define({
|
||||
|
||||
// Identifiers:
|
||||
// - direct child of CallExpr → function/method name
|
||||
// - direct child of CodeArgs → named argument key (key: value syntax)
|
||||
// - everywhere else → plain variable
|
||||
// - CodeArgKey (named arg key, emitted by tokenizer before ':') → attributeName
|
||||
// - everywhere else → plain variable
|
||||
'CallExpr/CodeIdent': t.function(t.variableName),
|
||||
'CodeArgs/CodeIdent': t.attributeName,
|
||||
CodeArgKey: t.attributeName,
|
||||
CodeIdent: t.variableName,
|
||||
|
||||
// Literals in code mode
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
MathContent,
|
||||
CodeKeyword,
|
||||
CodeIdent,
|
||||
CodeArgKey,
|
||||
StrongBody,
|
||||
EmphBody,
|
||||
} from './typst.terms.mjs'
|
||||
@@ -33,6 +34,7 @@ const UNDERSCORE = 95 // _
|
||||
const DOT = 46 // .
|
||||
const OPEN_PAREN = 40 // (
|
||||
const COMMA = 44 // ,
|
||||
const COLON = 58 // :
|
||||
|
||||
const KEYWORDS = new Set([
|
||||
'let', 'set', 'show', 'import', 'include',
|
||||
@@ -252,36 +254,23 @@ export const lineCommentContentTokenizer = new ExternalTokenizer(
|
||||
)
|
||||
|
||||
// ── mathContentTokenizer ────────────────────────────────────────────────
|
||||
// Emits MathContent — everything between the $...$ delimiters.
|
||||
// Emits MathContent — one line of content between the $...$ delimiters.
|
||||
// Stops at '$' or '\n' so each token is bounded to a single line.
|
||||
//
|
||||
// Typst distinguishes inline math ($x^2$) from display math ($ x^2 $):
|
||||
// display math has whitespace between the opening '$' and the content.
|
||||
// We detect this by scanning back to '$': if there is any whitespace
|
||||
// between '$' and the current position (i.e. @skip consumed it), the
|
||||
// tokenizer allows newlines so multi-line display math works. Inline math
|
||||
// keeps the newline stop, preventing a lone '$' from consuming the rest of
|
||||
// the document.
|
||||
//
|
||||
// contextual: true — only fires inside InlineMath after '$', never in
|
||||
// body text. The '$' token appears nowhere else in the grammar so the
|
||||
// post-'$' state does not merge with item* states.
|
||||
// The grammar uses MathContent* (not MathContent?) so multi-line display
|
||||
// math ($ ... \n ... $) is handled by multiple MathContent tokens, one per
|
||||
// line, with @skip consuming the newlines in between. This keeps each
|
||||
// token short and prevents a stray '$' from consuming the whole document.
|
||||
export const mathContentTokenizer = new ExternalTokenizer(
|
||||
(input, _stack) => {
|
||||
// Scan back to the opening '$', detecting display vs inline math.
|
||||
let back = -1
|
||||
while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
|
||||
if (input.peek(back) !== DOLLAR) return
|
||||
const isDisplay = back < -1 // whitespace between '$' and current position
|
||||
|
||||
let hasContent = false
|
||||
while (input.next !== -1 && input.next !== DOLLAR) {
|
||||
if (!isDisplay && input.next === NEWLINE) break
|
||||
while (input.next !== -1 && input.next !== DOLLAR && input.next !== NEWLINE) {
|
||||
input.advance()
|
||||
hasContent = true
|
||||
}
|
||||
if (hasContent) input.acceptToken(MathContent)
|
||||
},
|
||||
{ contextual: true }
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── codeKeywordTokenizer ─────────────────────────────────────────────────
|
||||
@@ -335,7 +324,9 @@ export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||
// handle them without conflict.
|
||||
export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (!stack.canShift(CodeIdent)) return
|
||||
const couldBeKey = stack.canShift(CodeArgKey)
|
||||
const couldBeIdent = stack.canShift(CodeIdent)
|
||||
if (!couldBeKey && !couldBeIdent) return
|
||||
|
||||
// Guard: only fire in code context.
|
||||
// Walk back past whitespace to the nearest non-space character.
|
||||
@@ -368,8 +359,18 @@ export const codeIdentTokenizer = new ExternalTokenizer(
|
||||
// Let codeKeywordTokenizer handle keywords; let CodeBool handle bools.
|
||||
if (KEYWORDS.has(word) || BOOLS.has(word)) return
|
||||
|
||||
// Emit CodeArgKey when this identifier is a named arg key (followed by ':').
|
||||
// Pre-disambiguating here avoids relying on LALR lookahead to choose between
|
||||
// codeArgItem alternatives, which is fragile under Lezer's state merging.
|
||||
let isArgKey = false
|
||||
if (couldBeKey) {
|
||||
let afterLen = len
|
||||
while (input.peek(afterLen) === SPACE || input.peek(afterLen) === TAB) afterLen++
|
||||
isArgKey = (input.peek(afterLen) === COLON)
|
||||
}
|
||||
|
||||
for (let i = 0; i < len; i++) input.advance()
|
||||
input.acceptToken(CodeIdent)
|
||||
input.acceptToken(isArgKey ? CodeArgKey : CodeIdent)
|
||||
},
|
||||
{ contextual: true }
|
||||
)
|
||||
|
||||
@@ -83,7 +83,7 @@ callSuffix {
|
||||
CodeArgs { "(" codeArgList? ")" }
|
||||
codeArgList { codeArgItem ("," codeArgItem)* ","? }
|
||||
codeArgItem {
|
||||
CodeIdent ":" codeValue |
|
||||
CodeArgKey ":" codeValue |
|
||||
codeValue
|
||||
}
|
||||
|
||||
@@ -104,7 +104,9 @@ ContentBlock { "[" item* "]" }
|
||||
|
||||
// ── Math ──────────────────────────────────────────────────────────────────
|
||||
// Both inline ($x^2$) and display ($ x^2 $) math use the same node type.
|
||||
InlineMath { "$" MathContent? "$" }
|
||||
// MathContent* (not ?) allows multi-line display math: each line becomes one
|
||||
// MathContent token (stopping at '\n'), and @skip consumes the newlines between.
|
||||
InlineMath { "$" MathContent* "$" }
|
||||
|
||||
// ── Markup formatting ─────────────────────────────────────────────────────
|
||||
// Strong and Emphasis use flat external body tokens (StrongBody / EmphBody)
|
||||
@@ -169,8 +171,12 @@ Escape { "\\" EscapeChar }
|
||||
// the token from firing in markup body text, where LALR state merging would
|
||||
// otherwise cause the entire token (including any leading '_') to be consumed
|
||||
// as a code identifier instead of letting '_' open an Emphasis.
|
||||
// CodeArgKey is emitted by the same tokenizer when an identifier is immediately
|
||||
// followed by ':' — the tokenizer pre-disambiguates named arg keys so the LALR
|
||||
// parser does not need to choose between codeArgItem alternatives on lookahead.
|
||||
@external tokens codeIdentTokenizer from "./tokens.mjs" {
|
||||
CodeIdent
|
||||
CodeIdent,
|
||||
CodeArgKey
|
||||
}
|
||||
|
||||
@external tokens strongBodyTokenizer from "./tokens.mjs" {
|
||||
|
||||
Reference in New Issue
Block a user