Fix Typst: bold/italic rendering and keyword false-highlights in body text
Build and Deploy Verso / deploy (push) Successful in 14m15s

Add .tok-strong and .tok-emphasis CSS to the static editor theme so
bold/italic markup actually renders visually.

Move CodeKeyword from @tokens to an external tokenizer (codeKeywordTokenizer)
with a peek(-1)==='#' guard. LALR state-merging causes code-mode states to be
reachable in markup positions, making common English words like "in", "for",
"while", "return" trigger CodeKeyword highlighting in body text. The '#' guard
ensures keywords only fire immediately after the '#' sigil, never in prose.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
claude
2026-06-08 20:20:02 +00:00
parent f5a94c0ced
commit f976c5ba92
3 changed files with 63 additions and 10 deletions
@@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({
alignItems: 'center', alignItems: 'center',
fontWeight: 'normal', fontWeight: 'normal',
}, },
// Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown)
'.tok-strong': { fontWeight: 'bold' },
'.tok-emphasis': { fontStyle: 'italic' },
'.cm-selectionLayer': { '.cm-selectionLayer': {
zIndex: -10, zIndex: -10,
}, },
@@ -12,6 +12,7 @@ import {
BlockCommentBody, BlockCommentBody,
LineCommentContent, LineCommentContent,
MathContent, MathContent,
CodeKeyword,
} from './typst.terms.mjs' } from './typst.terms.mjs'
const BACKTICK = 96 // ` const BACKTICK = 96 // `
@@ -24,6 +25,14 @@ const TAB = 9 // \t
const DOLLAR = 36 // $ const DOLLAR = 36 // $
const OPEN_BRACE = 123 // { const OPEN_BRACE = 123 // {
const CLOSE_BRACE = 125 // } const CLOSE_BRACE = 125 // }
const HASH = 35 // #
const KEYWORDS = new Set([
'let', 'set', 'show', 'import', 'include',
'if', 'else', 'for', 'while', 'return',
'break', 'continue', 'in', 'as',
'and', 'or', 'not', 'context',
])
// ── headingTokenizer ──────────────────────────────────────────────────── // ── headingTokenizer ────────────────────────────────────────────────────
// Emits HeadingMark — the "=+" prefix plus the trailing whitespace. // Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
@@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer(
}, },
{ contextual: false } { contextual: false }
) )
// ── codeKeywordTokenizer ─────────────────────────────────────────────────
// Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding
// character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr.
//
// The peek(-1)==='#' guard is what prevents LALR state-merging from causing
// these tokens to fire in body-text positions. Common English words like
// "in", "for", "while", "return" appear in markup paragraphs; without the
// guard they would be highlighted as keywords due to LALR-merged states where
// CodeKeyword is technically in the valid set.
export const codeKeywordTokenizer = new ExternalTokenizer(
(input, stack) => {
if (!stack.canShift(CodeKeyword)) return
// Only fire right after '#'; any other predecessor means we are in body text.
if (input.peek(-1) !== HASH) return
// Peek ahead to read the full identifier without advancing.
let len = 0
while (true) {
const ch = input.peek(len)
if ((ch >= 65 && ch <= 90) || // AZ
(ch >= 97 && ch <= 122) || // az
(ch >= 48 && ch <= 57) || // 09
ch === 95 || // _
ch === 45) { // -
len++
} else {
break
}
}
if (len === 0) return
const chars = []
for (let i = 0; i < len; i++) chars.push(input.peek(i))
const word = String.fromCharCode(...chars)
if (!KEYWORDS.has(word)) return
for (let i = 0; i < len; i++) input.advance()
input.acceptToken(CodeKeyword)
},
{ contextual: true }
)
@@ -158,6 +158,10 @@ Escape { "\\" EscapeChar }
MathContent MathContent
} }
@external tokens codeKeywordTokenizer from "./tokens.mjs" {
CodeKeyword
}
// ── Regular tokens ──────────────────────────────────────────────────────── // ── Regular tokens ────────────────────────────────────────────────────────
@tokens { @tokens {
// Horizontal whitespace only. Newlines are kept as explicit Newline items // Horizontal whitespace only. Newlines are kept as explicit Newline items
@@ -165,15 +169,6 @@ Escape { "\\" EscapeChar }
// reliably detect newlines in the raw input stream. // reliably detect newlines in the raw input stream.
spaces { $[ \t]+ } spaces { $[ \t]+ }
// Keywords take precedence over identifiers when they match fully
// (e.g. "let" → CodeKeyword, "letter" → CodeIdent).
CodeKeyword {
"let" | "set" | "show" | "import" | "include" |
"if" | "else" | "for" | "while" | "return" |
"break" | "continue" | "in" | "as" |
"and" | "or" | "not" | "context"
}
// Boolean / null literals — distinct from keywords for highlighting. // Boolean / null literals — distinct from keywords for highlighting.
CodeBool { "true" | "false" | "none" | "auto" } CodeBool { "true" | "false" | "none" | "auto" }
@@ -225,7 +220,9 @@ Escape { "\\" EscapeChar }
// the post-keyword state with markup states where "_" starts Emphasis. // the post-keyword state with markup states where "_" starts Emphasis.
// CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup // CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
// states CodeIdent is not in the valid set so "_" still opens Emphasis. // states CodeIdent is not in the valid set so "_" still opens Emphasis.
@precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText } // CodeKeyword is now an external token (codeKeywordTokenizer) and therefore
// not listed here — it uses a peek(-1)==='#' guard to stay out of markup.
@precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
} }
@skip { spaces } @skip { spaces }