Fix Typst: bold/italic rendering and keyword false-highlights in body text

Add .tok-strong and .tok-emphasis CSS to the static editor theme so bold/italic markup actually renders visually. Move CodeKeyword from @tokens to an external tokenizer (codeKeywordTokenizer) with a peek(-1)==='#' guard. LALR state-merging causes code-mode states to be reachable in markup positions, making common English words like "in", "for", "while", "return" trigger CodeKeyword highlighting in body text. The '#' guard ensures keywords only fire immediately after the '#' sigil, never in prose. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 20:20:02 +00:00
parent f5a94c0ced
commit f976c5ba92
3 changed files with 63 additions and 10 deletions
@@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({
    alignItems: 'center',
    fontWeight: 'normal',
  },
+  // Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown)
+  '.tok-strong': { fontWeight: 'bold' },
+  '.tok-emphasis': { fontStyle: 'italic' },
  '.cm-selectionLayer': {
    zIndex: -10,
  },
@@ -12,6 +12,7 @@ import {
  BlockCommentBody,
  LineCommentContent,
  MathContent,
+  CodeKeyword,
 } from './typst.terms.mjs'

 const BACKTICK  = 96  // `
@@ -24,6 +25,14 @@ const TAB       =  9  // \t
 const DOLLAR    = 36  // $
 const OPEN_BRACE  = 123 // {
 const CLOSE_BRACE = 125 // }
+const HASH      = 35  // #
+
+const KEYWORDS = new Set([
+  'let', 'set', 'show', 'import', 'include',
+  'if', 'else', 'for', 'while', 'return',
+  'break', 'continue', 'in', 'as',
+  'and', 'or', 'not', 'context',
+])

 // ── headingTokenizer ────────────────────────────────────────────────────
 // Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
@@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer(
  },
  { contextual: false }
 )
+
+// ── codeKeywordTokenizer ─────────────────────────────────────────────────
+// Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding
+// character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr.
+//
+// The peek(-1)==='#' guard is what prevents LALR state-merging from causing
+// these tokens to fire in body-text positions.  Common English words like
+// "in", "for", "while", "return" appear in markup paragraphs; without the
+// guard they would be highlighted as keywords due to LALR-merged states where
+// CodeKeyword is technically in the valid set.
+export const codeKeywordTokenizer = new ExternalTokenizer(
+  (input, stack) => {
+    if (!stack.canShift(CodeKeyword)) return
+    // Only fire right after '#'; any other predecessor means we are in body text.
+    if (input.peek(-1) !== HASH) return
+
+    // Peek ahead to read the full identifier without advancing.
+    let len = 0
+    while (true) {
+      const ch = input.peek(len)
+      if ((ch >= 65 && ch <= 90) ||  // A–Z
+          (ch >= 97 && ch <= 122) ||  // a–z
+          (ch >= 48 && ch <= 57) ||   // 0–9
+          ch === 95 ||                 // _
+          ch === 45) {                 // -
+        len++
+      } else {
+        break
+      }
+    }
+
+    if (len === 0) return
+
+    const chars = []
+    for (let i = 0; i < len; i++) chars.push(input.peek(i))
+    const word = String.fromCharCode(...chars)
+
+    if (!KEYWORDS.has(word)) return
+
+    for (let i = 0; i < len; i++) input.advance()
+    input.acceptToken(CodeKeyword)
+  },
+  { contextual: true }
+)
@@ -158,6 +158,10 @@ Escape { "\\" EscapeChar }
  MathContent
 }

+@external tokens codeKeywordTokenizer from "./tokens.mjs" {
+  CodeKeyword
+}
+
 // ── Regular tokens ────────────────────────────────────────────────────────
@tokens {
  // Horizontal whitespace only.  Newlines are kept as explicit Newline items
@@ -165,15 +169,6 @@ Escape { "\\" EscapeChar }
  // reliably detect newlines in the raw input stream.
  spaces { $[ \t]+ }

-  // Keywords take precedence over identifiers when they match fully
-  // (e.g. "let" → CodeKeyword, "letter" → CodeIdent).
-  CodeKeyword {
-    "let"      | "set"      | "show"     | "import"   | "include" |
-    "if"       | "else"     | "for"      | "while"    | "return"  |
-    "break"    | "continue" | "in"       | "as"       |
-    "and"      | "or"       | "not"      | "context"
-  }
-
  // Boolean / null literals — distinct from keywords for highlighting.
  CodeBool { "true" | "false" | "none" | "auto" }

@@ -225,7 +220,9 @@ Escape { "\\" EscapeChar }
  // the post-keyword state with markup states where "_" starts Emphasis.
  // CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
  // states CodeIdent is not in the valid set so "_" still opens Emphasis.
-  @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
+  // CodeKeyword is now an external token (codeKeywordTokenizer) and therefore
+  // not listed here — it uses a peek(-1)==='#' guard to stay out of markup.
+  @precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
 }

@skip { spaces }