From d7ca7b194d03fb0f7fb708edd817c6b779db059e Mon Sep 17 00:00:00 2001
From: claude <claude@verso>
Date: Tue, 9 Jun 2026 14:57:21 +0000
Subject: [PATCH] feat(typst): parse show-rule bodies, let-value bindings, and
 content-block call args

Three grammar gaps caused large blocks of code to be unhighlighted:

1. KeywordExpr now accepts an exclusive keywordBody: '#show sel: body' is
   parsed via ':', and '#let name = value' via '='.  callOrValue extends
   the subject to include CodeString so '#import "pkg"' highlights the path.

2. ContentBlock added to callSuffix so '#func("arg")[content]' and
   '#next-step("url")[...]' parse their trailing content block as code
   rather than falling back to markup.

3. Tokenizer: COLON added as a valid predecessor so identifiers (e.g. 'blue'
   in 'fill: blue') and keywords (e.g. 'set' in '#show link: set text(...)')
   are recognised after ':'.  EQUALS already added in the previous commit.
   The ident-chain backward scan now also skips whitespace before testing for
   '#' or ':', enabling 'text' in 'set text' to trace back to '#' through the
   keyword gap.  @precedence updated with CodeString, '[', ':' to resolve
   overlapping-token conflicts with MarkupContent in merged states.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../source-editor/lezer-typst/tokens.mjs      | 21 +++++++++++++------
 .../source-editor/lezer-typst/typst.grammar   | 18 ++++++++++------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
index 65d3098f0d..61f0ee11c5 100644
--- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
+++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs
@@ -285,8 +285,12 @@ export const mathContentTokenizer = new ExternalTokenizer(
 export const codeKeywordTokenizer = new ExternalTokenizer(
   (input, stack) => {
     if (!stack.canShift(CodeKeyword)) return
-    // Only fire right after '#'; any other predecessor means we are in body text.
-    if (input.peek(-1) !== HASH) return
+    // Valid positions: immediately after '#' (normal #set, #show) or after ':'
+    // (show-body: '#show sel: set text(...)').  Walk back past optional whitespace.
+    let back = -1
+    while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
+    const kwPrev = input.peek(back)
+    if (kwPrev !== HASH && kwPrev !== COLON) return
 
     // Peek ahead to read the full identifier without advancing.
     let len = 0
@@ -337,18 +341,23 @@ export const codeIdentTokenizer = new ExternalTokenizer(
     while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back--
     const prev = input.peek(back)
 
-    if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA) {
-      // May be after a keyword like '#set' or '#show': scan back through the
-      // keyword word itself and check that '#' immediately precedes it.
+    if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) {
+      // May be after a keyword chain like '#set text' or (in show body) 'set body':
+      // scan back through the preceding identifier word, skip whitespace, and
+      // verify '#' or ':' precedes it.  Accepting ':' lets multi-word chains
+      // like '#show sel: set text' find ':' before 'set'.
       if (!isIdentTail(prev)) return
       let b = back
       while (isIdentTail(input.peek(b))) b--
-      if (input.peek(b) !== HASH) return
+      while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b--
+      const chainEnd = input.peek(b)
+      if (chainEnd !== HASH && chainEnd !== COLON) return
     }
 
     // In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless
     // of canShift(CodeIdent) — LALR merging can suppress canShift(CodeIdent)
     // after a complex first argument (e.g. figure(table(...), caption: ...)).
+    // ':' and '=' are value positions, NOT arg-key positions.
     const couldBeArgKey = prev === OPEN_PAREN || prev === COMMA
     if (!couldBeIdent && !couldBeArgKey) return
 
diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
index 259d93de0c..7e6ec2efe0 100644
--- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
+++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar
@@ -68,10 +68,12 @@ codeExprBody {
   ContentBlock
 }
 
-// CallExpr? covers '#set text(size: 12pt)', '#show heading: ...', etc.
-// The optional CallExpr is only shifted when the next token is CodeIdent,
-// so there is no shift/reduce conflict with other items that follow keywords.
-KeywordExpr { CodeKeyword CallExpr? }
+// callOrValue covers the subject of a keyword expression (#set text, #show link,
+// #import "pkg", #let name).  keywordBody is exclusive: ':' for show-rule bodies
+// and '=' for let-binding values (a keyword expression never has both).
+KeywordExpr { CodeKeyword callOrValue? keywordBody? }
+callOrValue { CallExpr | CodeString }
+keywordBody { ":" codeExprBody | "=" codeValue }
 AtomExpr    { CodeBool    }
 
 // CallExpr allows zero suffixes — used at top level (#x) and after keywords
@@ -83,7 +85,8 @@ CallExpr { CodeIdent callSuffix* }
 FuncExpr { CodeIdent callSuffix+ }
 callSuffix {
   CodeArgs |
-  "." CodeIdent
+  "." CodeIdent |
+  ContentBlock
 }
 
 CodeArgs    { "(" codeArgList? ")" }
@@ -235,7 +238,10 @@ Escape { "\\" EscapeChar }
   //   by MarkupContent (redundant since '_' is in MarkupContent's exclusion
   //   set, but kept for clarity).
   // CodeIdent and StrongText/EmphText are now external tokens — not listed.
-  @precedence { CodeBool EscapeChar "(" "." "]" "_" spaces MarkupContent }
+  // "["  > MarkupContent: ContentBlock callSuffix wins in merged code/markup states.
+  // CodeString > MarkupContent: '"' starts a string literal after a keyword.
+  // ":"  > MarkupContent: keywordBody ':' wins over markup colon in code states.
+  @precedence { CodeBool EscapeChar CodeString "[" ":" "(" "." "]" "_" spaces MarkupContent }
 }
 
 @skip { spaces }