Fix Typst: bold/italic rendering and keyword false-highlights in body text
Build and Deploy Verso / deploy (push) Successful in 14m15s
Build and Deploy Verso / deploy (push) Successful in 14m15s
Add .tok-strong and .tok-emphasis CSS to the static editor theme so bold/italic markup actually renders visually. Move CodeKeyword from @tokens to an external tokenizer (codeKeywordTokenizer) with a peek(-1)==='#' guard. LALR state-merging causes code-mode states to be reachable in markup positions, making common English words like "in", "for", "while", "return" trigger CodeKeyword highlighting in body text. The '#' guard ensures keywords only fire immediately after the '#' sigil, never in prose. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({
|
||||
alignItems: 'center',
|
||||
fontWeight: 'normal',
|
||||
},
|
||||
// Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown)
|
||||
'.tok-strong': { fontWeight: 'bold' },
|
||||
'.tok-emphasis': { fontStyle: 'italic' },
|
||||
'.cm-selectionLayer': {
|
||||
zIndex: -10,
|
||||
},
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
BlockCommentBody,
|
||||
LineCommentContent,
|
||||
MathContent,
|
||||
CodeKeyword,
|
||||
} from './typst.terms.mjs'
|
||||
|
||||
const BACKTICK = 96 // `
|
||||
@@ -24,6 +25,14 @@ const TAB = 9 // \t
|
||||
const DOLLAR = 36 // $
|
||||
const OPEN_BRACE = 123 // {
|
||||
const CLOSE_BRACE = 125 // }
|
||||
const HASH = 35 // #
|
||||
|
||||
const KEYWORDS = new Set([
|
||||
'let', 'set', 'show', 'import', 'include',
|
||||
'if', 'else', 'for', 'while', 'return',
|
||||
'break', 'continue', 'in', 'as',
|
||||
'and', 'or', 'not', 'context',
|
||||
])
|
||||
|
||||
// ── headingTokenizer ────────────────────────────────────────────────────
|
||||
// Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
|
||||
@@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer(
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── codeKeywordTokenizer ─────────────────────────────────────────────────
|
||||
// Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding
|
||||
// character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr.
|
||||
//
|
||||
// The peek(-1)==='#' guard is what prevents LALR state-merging from causing
|
||||
// these tokens to fire in body-text positions. Common English words like
|
||||
// "in", "for", "while", "return" appear in markup paragraphs; without the
|
||||
// guard they would be highlighted as keywords due to LALR-merged states where
|
||||
// CodeKeyword is technically in the valid set.
|
||||
export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (!stack.canShift(CodeKeyword)) return
|
||||
// Only fire right after '#'; any other predecessor means we are in body text.
|
||||
if (input.peek(-1) !== HASH) return
|
||||
|
||||
// Peek ahead to read the full identifier without advancing.
|
||||
let len = 0
|
||||
while (true) {
|
||||
const ch = input.peek(len)
|
||||
if ((ch >= 65 && ch <= 90) || // A–Z
|
||||
(ch >= 97 && ch <= 122) || // a–z
|
||||
(ch >= 48 && ch <= 57) || // 0–9
|
||||
ch === 95 || // _
|
||||
ch === 45) { // -
|
||||
len++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (len === 0) return
|
||||
|
||||
const chars = []
|
||||
for (let i = 0; i < len; i++) chars.push(input.peek(i))
|
||||
const word = String.fromCharCode(...chars)
|
||||
|
||||
if (!KEYWORDS.has(word)) return
|
||||
|
||||
for (let i = 0; i < len; i++) input.advance()
|
||||
input.acceptToken(CodeKeyword)
|
||||
},
|
||||
{ contextual: true }
|
||||
)
|
||||
|
||||
@@ -158,6 +158,10 @@ Escape { "\\" EscapeChar }
|
||||
MathContent
|
||||
}
|
||||
|
||||
@external tokens codeKeywordTokenizer from "./tokens.mjs" {
|
||||
CodeKeyword
|
||||
}
|
||||
|
||||
// ── Regular tokens ────────────────────────────────────────────────────────
|
||||
@tokens {
|
||||
// Horizontal whitespace only. Newlines are kept as explicit Newline items
|
||||
@@ -165,15 +169,6 @@ Escape { "\\" EscapeChar }
|
||||
// reliably detect newlines in the raw input stream.
|
||||
spaces { $[ \t]+ }
|
||||
|
||||
// Keywords take precedence over identifiers when they match fully
|
||||
// (e.g. "let" → CodeKeyword, "letter" → CodeIdent).
|
||||
CodeKeyword {
|
||||
"let" | "set" | "show" | "import" | "include" |
|
||||
"if" | "else" | "for" | "while" | "return" |
|
||||
"break" | "continue" | "in" | "as" |
|
||||
"and" | "or" | "not" | "context"
|
||||
}
|
||||
|
||||
// Boolean / null literals — distinct from keywords for highlighting.
|
||||
CodeBool { "true" | "false" | "none" | "auto" }
|
||||
|
||||
@@ -225,7 +220,9 @@ Escape { "\\" EscapeChar }
|
||||
// the post-keyword state with markup states where "_" starts Emphasis.
|
||||
// CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
|
||||
// states CodeIdent is not in the valid set so "_" still opens Emphasis.
|
||||
@precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
|
||||
// CodeKeyword is now an external token (codeKeywordTokenizer) and therefore
|
||||
// not listed here — it uses a peek(-1)==='#' guard to stay out of markup.
|
||||
@precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
|
||||
}
|
||||
|
||||
@skip { spaces }
|
||||
|
||||
Reference in New Issue
Block a user