Fix Typst: bold/italic rendering and keyword false-highlights in body text

Add .tok-strong and .tok-emphasis CSS to the static editor theme so bold/italic markup actually renders visually. Move CodeKeyword from @tokens to an external tokenizer (codeKeywordTokenizer) with a peek(-1)==='#' guard. LALR state-merging causes code-mode states to be reachable in markup positions, making common English words like "in", "for", "while", "return" trigger CodeKeyword highlighting in body text. The '#' guard ensures keywords only fire immediately after the '#' sigil, never in prose. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 20:20:02 +00:00
parent f5a94c0ced
commit f976c5ba92
3 changed files with 63 additions and 10 deletions
@@ -203,6 +203,9 @@ const staticTheme = EditorView.theme({
    alignItems: 'center',
    fontWeight: 'normal',
  },
  // Bold and italic markup (e.g. *strong* _emphasis_ in Typst and Markdown)
  '.tok-strong': { fontWeight: 'bold' },
  '.tok-emphasis': { fontStyle: 'italic' },
  '.cm-selectionLayer': {
    zIndex: -10,
  },
@@ -12,6 +12,7 @@ import {
  BlockCommentBody,
  LineCommentContent,
  MathContent,
  CodeKeyword,
 } from './typst.terms.mjs'
 const BACKTICK  = 96  // `
@@ -24,6 +25,14 @@ const TAB       =  9  // \t
 const DOLLAR    = 36  // $
 const OPEN_BRACE  = 123 // {
 const CLOSE_BRACE = 125 // }
 const HASH      = 35  // #
 const KEYWORDS = new Set([
  'let', 'set', 'show', 'import', 'include',
  'if', 'else', 'for', 'while', 'return',
  'break', 'continue', 'in', 'as',
  'and', 'or', 'not', 'context',
 ])
 // ── headingTokenizer ────────────────────────────────────────────────────
 // Emits HeadingMark — the "=+" prefix plus the trailing whitespace.
@@ -243,3 +252,47 @@ export const mathContentTokenizer = new ExternalTokenizer(
  },
  { contextual: false }
 )
 // ── codeKeywordTokenizer ─────────────────────────────────────────────────
 // Emits CodeKeyword (let, set, for, while, in, …) ONLY when the preceding
 // character is '#', i.e. we are immediately after the '#' sigil in a CodeExpr.
 //
 // The peek(-1)==='#' guard is what prevents LALR state-merging from causing
 // these tokens to fire in body-text positions.  Common English words like
 // "in", "for", "while", "return" appear in markup paragraphs; without the
 // guard they would be highlighted as keywords due to LALR-merged states where
 // CodeKeyword is technically in the valid set.
 export const codeKeywordTokenizer = new ExternalTokenizer(
  (input, stack) => {
    if (!stack.canShift(CodeKeyword)) return
    // Only fire right after '#'; any other predecessor means we are in body text.
    if (input.peek(-1) !== HASH) return
    // Peek ahead to read the full identifier without advancing.
    let len = 0
    while (true) {
      const ch = input.peek(len)
      if ((ch >= 65 && ch <= 90) ||  // A–Z
          (ch >= 97 && ch <= 122) ||  // a–z
          (ch >= 48 && ch <= 57) ||   // 0–9
          ch === 95 ||                 // _
          ch === 45) {                 // -
        len++
      } else {
        break
      }
    }
    if (len === 0) return
    const chars = []
    for (let i = 0; i < len; i++) chars.push(input.peek(i))
    const word = String.fromCharCode(...chars)
    if (!KEYWORDS.has(word)) return
    for (let i = 0; i < len; i++) input.advance()
    input.acceptToken(CodeKeyword)
  },
  { contextual: true }
 )
@@ -158,6 +158,10 @@ Escape { "\\" EscapeChar }
  MathContent
 }
@external tokens codeKeywordTokenizer from "./tokens.mjs" {
  CodeKeyword
 }
 // ── Regular tokens ────────────────────────────────────────────────────────
@tokens {
  // Horizontal whitespace only.  Newlines are kept as explicit Newline items
@@ -165,15 +169,6 @@ Escape { "\\" EscapeChar }
  // reliably detect newlines in the raw input stream.
  spaces { $[ \t]+ }
  // Keywords take precedence over identifiers when they match fully
  // (e.g. "let" → CodeKeyword, "letter" → CodeIdent).
  CodeKeyword {
    "let"      | "set"      | "show"     | "import"   | "include" |
    "if"       | "else"     | "for"      | "while"    | "return"  |
    "break"    | "continue" | "in"       | "as"       |
    "and"      | "or"       | "not"      | "context"
  }
  // Boolean / null literals — distinct from keywords for highlighting.
  CodeBool { "true" | "false" | "none" | "auto" }
@@ -225,7 +220,9 @@ Escape { "\\" EscapeChar }
  // the post-keyword state with markup states where "_" starts Emphasis.
  // CodeIdent wins so '#set _name(...)' is tokenised correctly; in pure markup
  // states CodeIdent is not in the valid set so "_" still opens Emphasis.
-  @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
+  // CodeKeyword is now an external token (codeKeywordTokenizer) and therefore
  // not listed here — it uses a peek(-1)==='#' guard to stay out of markup.
  @precedence { CodeBool CodeIdent EscapeChar "(" "." "]" "_" spaces MarkupContent StrongText EmphText }
 }
@skip { spaces }