Fix Typst: bold/italic rendering and keyword false-highlights in body text
Build and Deploy Verso / deploy (push) Successful in 14m15s
Build and Deploy Verso / deploy (push) Successful in 14m15s
Add .tok-strong and .tok-emphasis CSS to the static editor theme so bold/italic markup actually renders visually. Move CodeKeyword from @tokens to an external tokenizer (codeKeywordTokenizer) with a peek(-1)==='#' guard. LALR state-merging causes code-mode states to be reachable in markup positions, making common English words like "in", "for", "while", "return" trigger CodeKeyword highlighting in body text. The '#' guard ensures keywords only fire immediately after the '#' sigil, never in prose. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({
|
|||||||
alignItems: 'center',
|
alignItems: 'center',
|
||||||
fontWeight: 'normal',
|
fontWeight: 'normal',
|
||||||
},
|
},
|
||||||
|
// Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown)
|
||||||
|
'.tok-strong': { fontWeight: 'bold' },
|
||||||
|
'.tok-emphasis': { fontStyle: 'italic' },
|
||||||
'.cm-selectionLayer': {
|
'.cm-selectionLayer': {
|
||||||
zIndex: -10,
|
zIndex: -10,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
BlockCommentBody,
|
BlockCommentBody,
|
||||||
LineCommentContent,
|
LineCommentContent,
|
||||||
MathContent,
|
MathContent,
|
||||||
|
CodeKeyword,
|
||||||
} from './typst.terms.mjs'
|
} from './typst.terms.mjs'
|
||||||
|
|
||||||
const BACKTICK = 96 // `
|
const BACKTICK = 96 // `
|
||||||
@@ -24,6 +25,14 @@ const TAB = 9 // \t
|
|||||||
const DOLLAR = 36 // $
|
const DOLLAR = 36 // $
|
||||||
const OPEN_BRACE = 123 // {
|
const OPEN_BRACE = 123 // {
|
||||||
const CLOSE_BRACE = 125 // }
|
const CLOSE_BRACE = 125 // }
|
||||||
|
const HASH = 35 // #
|
||||||
|
|
||||||
|
const KEYWORDS = new Set([
|
||||||
|
'let', 'set', 'show', 'import', 'include',
|
||||||
|
'if', 'else', 'for', 'while', 'return',
|
||||||
|
'break', 'continue', 'in', 'as',
|
||||||
|
'and', 'or', 'not', 'context',
|
||||||
|
])
|
||||||
|
|
||||||
// ── headingTokenizer ────────────────────────────────────────────────────
|
// ── headingTokenizer ────────────────────────────────────────────────────
|
||||||
// Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
|
// Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
|
||||||
@@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer(
|
|||||||
},
|
},
|
||||||
{ contextual: false }
|
{ contextual: false }
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ── codeKeywordTokenizer ─────────────────────────────────────────────────
|
||||||
|
// Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding
|
||||||
|
// character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr.
|
||||||
|
//
|
||||||
|
// The peek(-1)==='#' guard is what prevents LALR state-merging from causing
|
||||||
|
// these tokens to fire in body-text positions. Common English words like
|
||||||
|
// "in", "for", "while", "return" appear in markup paragraphs; without the
|
||||||
|
// guard they would be highlighted as keywords due to LALR-merged states where
|
||||||
|
// CodeKeyword is technically in the valid set.
|
||||||
|
export const codeKeywordTokenizer = new ExternalTokenizer(
|
||||||
|
(input, stack) => {
|
||||||
|
if (!stack.canShift(CodeKeyword)) return
|
||||||
|
// Only fire right after '#'; any other predecessor means we are in body text.
|
||||||
|
if (input.peek(-1) !== HASH) return
|
||||||
|
|
||||||
|
// Peek ahead to read the full identifier without advancing.
|
||||||
|
let len = 0
|
||||||
|
while (true) {
|
||||||
|
const ch = input.peek(len)
|
||||||
|
if ((ch >= 65 && ch <= 90) || // A–Z
|
||||||
|
(ch >= 97 && ch <= 122) || // a–z
|
||||||
|
(ch >= 48 && ch <= 57) || // 0–9
|
||||||
|
ch === 95 || // _
|
||||||
|
ch === 45) { // -
|
||||||
|
len++
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len === 0) return
|
||||||
|
|
||||||
|
const chars = []
|
||||||
|
for (let i = 0; i < len; i++) chars.push(input.peek(i))
|
||||||
|
const word = String.fromCharCode(...chars)
|
||||||
|
|
||||||
|
if (!KEYWORDS.has(word)) return
|
||||||
|
|
||||||
|
for (let i = 0; i < len; i++) input.advance()
|
||||||
|
input.acceptToken(CodeKeyword)
|
||||||
|
},
|
||||||
|
{ contextual: true }
|
||||||
|
)
|
||||||
|
|||||||
@@ -158,6 +158,10 @@ Escape { "\\" EscapeChar }
|
|||||||
MathContent
|
MathContent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@external tokens codeKeywordTokenizer from "./tokens.mjs" {
|
||||||
|
CodeKeyword
|
||||||
|
}
|
||||||
|
|
||||||
// ── Regular tokens ────────────────────────────────────────────────────────
|
// ── Regular tokens ────────────────────────────────────────────────────────
|
||||||
@tokens {
|
@tokens {
|
||||||
// Horizontal whitespace only. Newlines are kept as explicit Newline items
|
// Horizontal whitespace only. Newlines are kept as explicit Newline items
|
||||||
@@ -165,15 +169,6 @@ Escape { "\\" EscapeChar }
|
|||||||
// reliably detect newlines in the raw input stream.
|
// reliably detect newlines in the raw input stream.
|
||||||
spaces { $[ \t]+ }
|
spaces { $[ \t]+ }
|
||||||
|
|
||||||
// Keywords take precedence over identifiers when they match fully
|
|
||||||
// (e.g. "let" → CodeKeyword, "letter" → CodeIdent).
|
|
||||||
CodeKeyword {
|
|
||||||
"let" | "set" | "show" | "import" | "include" |
|
|
||||||
"if" | "else" | "for" | "while" | "return" |
|
|
||||||
"break" | "continue" | "in" | "as" |
|
|
||||||
"and" | "or" | "not" | "context"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Boolean / null literals — distinct from keywords for highlighting.
|
// Boolean / null literals — distinct from keywords for highlighting.
|
||||||
CodeBool { "true" | "false" | "none" | "auto" }
|
CodeBool { "true" | "false" | "none" | "auto" }
|
||||||
|
|
||||||
@@ -225,7 +220,9 @@ Escape { "\\" EscapeChar }
|
|||||||
// the post-keyword state with markup states where "_" starts Emphasis.
|
// the post-keyword state with markup states where "_" starts Emphasis.
|
||||||
// CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
|
// CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
|
||||||
// states CodeIdent is not in the valid set so "_" still opens Emphasis.
|
// states CodeIdent is not in the valid set so "_" still opens Emphasis.
|
||||||
@precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
|
// CodeKeyword is now an external token (codeKeywordTokenizer) and therefore
|
||||||
|
// not listed here — it uses a peek(-1)==='#' guard to stay out of markup.
|
||||||
|
@precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
|
||||||
}
|
}
|
||||||
|
|
||||||
@skip { spaces }
|
@skip { spaces }
|
||||||
|
|||||||
Reference in New Issue
Block a user