diff --git a/services/web/frontend/js/features/source-editor/extensions/theme.ts b/services/web/frontend/js/features/source-editor/extensions/theme.ts index a90c0dcdeb..be3063a478 100644 --- a/services/web/frontend/js/features/source-editor/extensions/theme.ts +++ b/services/web/frontend/js/features/source-editor/extensions/theme.ts @@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({ alignItems: 'center', fontWeight: 'normal', }, + // Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown) + '.tok-strong': { fontWeight: 'bold' }, + '.tok-emphasis': { fontStyle: 'italic' }, '.cm-selectionLayer': { zIndex: -10, }, diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 186b9945c4..90fb687339 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -12,6 +12,7 @@ import { BlockCommentBody, LineCommentContent, MathContent, + CodeKeyword, } from './typst.terms.mjs' const BACKTICK = 96 // ` @@ -24,6 +25,14 @@ const TAB = 9 // \t const DOLLAR = 36 // $ const OPEN_BRACE = 123 // { const CLOSE_BRACE = 125 // } +const HASH = 35 // # + +const KEYWORDS = new Set([ + 'let', 'set', 'show', 'import', 'include', + 'if', 'else', 'for', 'while', 'return', + 'break', 'continue', 'in', 'as', + 'and', 'or', 'not', 'context', +]) // ── headingTokenizer ──────────────────────────────────────────────────── // Emits HeadingMark — the "=+" prefix plus the trailing whitespace. @@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer( }, { contextual: false } ) + +// ── codeKeywordTokenizer ───────────────────────────────────────────────── +// Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding +// character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr. +// +// The peek(-1)==='#' guard is what prevents LALR state-merging from causing +// these tokens to fire in body-text positions. Common English words like +// "in", "for", "while", "return" appear in markup paragraphs; without the +// guard they would be highlighted as keywords due to LALR-merged states where +// CodeKeyword is technically in the valid set. +export const codeKeywordTokenizer = new ExternalTokenizer( + (input, stack) => { + if (!stack.canShift(CodeKeyword)) return + // Only fire right after '#'; any other predecessor means we are in body text. + if (input.peek(-1) !== HASH) return + + // Peek ahead to read the full identifier without advancing. + let len = 0 + while (true) { + const ch = input.peek(len) + if ((ch >= 65 && ch <= 90) || // A–Z + (ch >= 97 && ch <= 122) || // a–z + (ch >= 48 && ch <= 57) || // 0–9 + ch === 95 || // _ + ch === 45) { // - + len++ + } else { + break + } + } + + if (len === 0) return + + const chars = [] + for (let i = 0; i < len; i++) chars.push(input.peek(i)) + const word = String.fromCharCode(...chars) + + if (!KEYWORDS.has(word)) return + + for (let i = 0; i < len; i++) input.advance() + input.acceptToken(CodeKeyword) + }, + { contextual: true } +) diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar index 69d2e8434f..9c51eee536 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar @@ -158,6 +158,10 @@ Escape { "\\" EscapeChar } MathContent } +@external tokens codeKeywordTokenizer from "./tokens.mjs" { + CodeKeyword +} + // ── Regular tokens ──────────────────────────────────────────────────────── @tokens { // Horizontal whitespace only. Newlines are kept as explicit Newline items @@ -165,15 +169,6 @@ Escape { "\\" EscapeChar } // reliably detect newlines in the raw input stream. spaces { $[ \t]+ } - // Keywords take precedence over identifiers when they match fully - // (e.g. "let" → CodeKeyword, "letter" → CodeIdent). - CodeKeyword { - "let" | "set" | "show" | "import" | "include" | - "if" | "else" | "for" | "while" | "return" | - "break" | "continue" | "in" | "as" | - "and" | "or" | "not" | "context" - } - // Boolean / null literals — distinct from keywords for highlighting. CodeBool { "true" | "false" | "none" | "auto" } @@ -225,7 +220,9 @@ Escape { "\\" EscapeChar } // the post-keyword state with markup states where "_" starts Emphasis. // CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup // states CodeIdent is not in the valid set so "_" still opens Emphasis. - @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText } + // CodeKeyword is now an external token (codeKeywordTokenizer) and therefore + // not listed here — it uses a peek(-1)==='#' guard to stay out of markup. + @precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText } } @skip { spaces }