From 1dcd6e24f4d870db971214a86995e45e382f8804 Mon Sep 17 00:00:00 2001
From: claude <claude@verso>
Date: Mon, 8 Jun 2026 07:56:20 +0000
Subject: [PATCH] lezer-typst: convert LineCommentContent and MathContent to
 external tokenizers

Both tokens are "read until delimiter" catchalls that match almost every
non-newline character, causing buildTokenGroups conflicts with every other
literal token in LALR-merged states.  Moving them to ExternalTokenizer (the
same pattern already used for HeadingTitle, RawBlockBody, etc.) makes them
context-isolated: the LR state machine only calls them when those tokens are
actually valid, so they never participate in the static token-group overlap
check.

Also exclude '<' from StrongText/EmphText so Label ('<' LabelName '>') is
recognised inside strong/emphasis spans rather than being consumed as plain
text.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../source-editor/lezer-typst/tokens.mjs      | 36 +++++++++++++++++++
 .../source-editor/lezer-typst/typst.grammar   | 33 +++++++++--------
 2 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
index 0ded687c9a..5161ad47e8 100644
--- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
+++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
@@ -10,6 +10,8 @@ import {
   RawInlineContent,
   CodeBlockBody,
   BlockCommentBody,
+  LineCommentContent,
+  MathContent,
 } from './typst.terms.mjs'
 
 const BACKTICK  = 96  // `
@@ -19,6 +21,7 @@ const NEWLINE   = 10  // \n
 const EQUALS    = 61  // =
 const SPACE     = 32  //
 const TAB       =  9  // \t
+const DOLLAR    = 36  // $
 const OPEN_BRACE  = 123 // {
 const CLOSE_BRACE = 125 // }
 
@@ -191,3 +194,36 @@ export const blockCommentTokenizer = new ExternalTokenizer(
   },
   { contextual: false }
 )
+
+// ── lineCommentContentTokenizer ─────────────────────────────────────────
+// Emits LineCommentContent — everything from the current position to EOL.
+// External rather than a @tokens rule because ![\n]+ conflicts with every
+// non-newline literal token in LALR-merged states.  External tokenizers are
+// context-isolated: only called when the LR state expects this token.
+export const lineCommentContentTokenizer = new ExternalTokenizer(
+  (input, _stack) => {
+    let hasContent = false
+    while (input.next !== -1 && input.next !== NEWLINE) {
+      input.advance()
+      hasContent = true
+    }
+    if (hasContent) input.acceptToken(LineCommentContent)
+  },
+  { contextual: false }
+)
+
+// ── mathContentTokenizer ────────────────────────────────────────────────
+// Emits MathContent — everything between the $...$ delimiters (no newlines).
+// External rather than a @tokens rule for the same reason as LineCommentContent:
+// ![$\n]+ overlaps with spaces, '<', '@', and other literals in merged states.
+export const mathContentTokenizer = new ExternalTokenizer(
+  (input, _stack) => {
+    let hasContent = false
+    while (input.next !== -1 && input.next !== DOLLAR && input.next !== NEWLINE) {
+      input.advance()
+      hasContent = true
+    }
+    if (hasContent) input.acceptToken(MathContent)
+  },
+  { contextual: false }
+)
diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
index ccfec7335b..7404ac9b00 100644
--- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
+++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
@@ -148,6 +148,14 @@ Escape { "\\" EscapeChar }
   BlockCommentBody
 }
 
+@external tokens lineCommentContentTokenizer from "./tokens.mjs" {
+  LineCommentContent
+}
+
+@external tokens mathContentTokenizer from "./tokens.mjs" {
+  MathContent
+}
+
 // ── Regular tokens ────────────────────────────────────────────────────────
 @tokens {
   // Horizontal whitespace only.  Newlines are kept as explicit Newline items
@@ -181,16 +189,14 @@ Escape { "\\" EscapeChar }
     ("pt" | "mm" | "cm" | "in" | "em" | "rem" | "fr" | "deg" | "rad" | "%")?
   }
 
-  // Comment content — everything to end of line.
-  LineCommentContent { ![\n]+ }
-
-  // Math content — everything between the $ delimiters (no crossing newlines).
-  MathContent { ![$\n]+ }
-
   // Text tokens for markup contexts; each excludes its own delimiters.
-  // HeadingText is gone: HeadingTitle is now an external token (see above).
-  StrongText   { ![\n*$#`@\\]+    }
-  EmphText     { ![\n_$#`@\\]+    }
+  // HeadingText, LineCommentContent, and MathContent are external tokens
+  // (see above) — broad "read-to-delimiter" tokens that would otherwise
+  // conflict with every other literal token in LALR-merged states.
+  // '<' is excluded from StrongText/EmphText so that Label ('<' LabelName '>')
+  // is recognised inside strong/emphasis rather than consumed as plain text.
+  StrongText   { ![\n*$#`<@\\]+   }
+  EmphText     { ![\n_$#`<@\\]+   }
 
   // Regular markup: excludes all special-character starters plus whitespace
   // (whitespace is handled by @skip).  The '/' is excluded so that '//' and
@@ -210,11 +216,10 @@ Escape { "\\" EscapeChar }
   // Resolve ambiguities: more-specific tokens win over broader catch-alls.
   // EscapeChar > spaces: after '\', EscapeChar must win over the skip token
   //   (both match \t; without this, '\t' would be mis-tokenized).
-  // "(" > "." > text tokens: after '#' CodeIdent, callSuffix delimiters must win
-  //   over MarkupContent/StrongText/EmphText in the LALR-merged state.
-  // "]" > LineCommentContent: inside #[...], ']' closes the ContentBlock even
-  //   if it appears after '//' (comment does not "protect" the bracket).
-  @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" spaces MarkupContent StrongText EmphText LineCommentContent }
+  // "(" > "." > "]" > text tokens: after '#' CodeIdent, callSuffix delimiters
+  //   must win over MarkupContent/StrongText/EmphText in merged states.
+  // LineCommentContent and MathContent are external tokens — not listed here.
+  @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" spaces MarkupContent StrongText EmphText }
 }
 
 @skip { spaces }