From d7ca7b194d03fb0f7fb708edd817c6b779db059e Mon Sep 17 00:00:00 2001 From: claude Date: Tue, 9 Jun 2026 14:57:21 +0000 Subject: [PATCH] feat(typst): parse show-rule bodies, let-value bindings, and content-block call args Three grammar gaps caused large blocks of code to be unhighlighted: 1. KeywordExpr now accepts an exclusive keywordBody: '#show sel: body' is parsed via ':', and '#let name = value' via '='. callOrValue extends the subject to include CodeString so '#import "pkg"' highlights the path. 2. ContentBlock added to callSuffix so '#func("arg")[content]' and '#next-step("url")[...]' parse their trailing content block as code rather than falling back to markup. 3. Tokenizer: COLON added as a valid predecessor so identifiers (e.g. 'blue' in 'fill: blue') and keywords (e.g. 'set' in '#show link: set text(...)') are recognised after ':'. EQUALS already added in the previous commit. The ident-chain backward scan now also skips whitespace before testing for '#' or ':', enabling 'text' in 'set text' to trace back to '#' through the keyword gap. @precedence updated with CodeString, '[', ':' to resolve overlapping-token conflicts with MarkupContent in merged states. Co-Authored-By: Claude Sonnet 4.6 --- .../source-editor/lezer-typst/tokens.mjs | 21 +++++++++++++------ .../source-editor/lezer-typst/typst.grammar | 18 ++++++++++------ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 65d3098f0d..61f0ee11c5 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -285,8 +285,12 @@ export const mathContentTokenizer = new ExternalTokenizer( export const codeKeywordTokenizer = new ExternalTokenizer( (input, stack) => { if (!stack.canShift(CodeKeyword)) return - // Only fire right after '#'; any other predecessor means we are in body text. - if (input.peek(-1) !== HASH) return + // Valid positions: immediately after '#' (normal #set, #show) or after ':' + // (show-body: '#show sel: set text(...)'). Walk back past optional whitespace. + let back = -1 + while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back-- + const kwPrev = input.peek(back) + if (kwPrev !== HASH && kwPrev !== COLON) return // Peek ahead to read the full identifier without advancing. let len = 0 @@ -337,18 +341,23 @@ export const codeIdentTokenizer = new ExternalTokenizer( while (input.peek(back) === SPACE || input.peek(back) === TAB || input.peek(back) === NEWLINE) back-- const prev = input.peek(back) - if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA) { - // May be after a keyword like '#set' or '#show': scan back through the - // keyword word itself and check that '#' immediately precedes it. + if (prev !== HASH && prev !== DOT && prev !== OPEN_PAREN && prev !== COMMA && prev !== EQUALS && prev !== COLON) { + // May be after a keyword chain like '#set text' or (in show body) 'set body': + // scan back through the preceding identifier word, skip whitespace, and + // verify '#' or ':' precedes it. Accepting ':' lets multi-word chains + // like '#show sel: set text' find ':' before 'set'. if (!isIdentTail(prev)) return let b = back while (isIdentTail(input.peek(b))) b-- - if (input.peek(b) !== HASH) return + while (input.peek(b) === SPACE || input.peek(b) === TAB || input.peek(b) === NEWLINE) b-- + const chainEnd = input.peek(b) + if (chainEnd !== HASH && chainEnd !== COLON) return } // In arg-delimiter positions ('(' or ',') we may emit CodeArgKey regardless // of canShift(CodeIdent) — LALR merging can suppress canShift(CodeIdent) // after a complex first argument (e.g. figure(table(...), caption: ...)). + // ':' and '=' are value positions, NOT arg-key positions. const couldBeArgKey = prev === OPEN_PAREN || prev === COMMA if (!couldBeIdent && !couldBeArgKey) return diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar index 259d93de0c..7e6ec2efe0 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar @@ -68,10 +68,12 @@ codeExprBody { ContentBlock } -// CallExpr? covers '#set text(size: 12pt)', '#show heading: ...', etc. -// The optional CallExpr is only shifted when the next token is CodeIdent, -// so there is no shift/reduce conflict with other items that follow keywords. -KeywordExpr { CodeKeyword CallExpr? } +// callOrValue covers the subject of a keyword expression (#set text, #show link, +// #import "pkg", #let name). keywordBody is exclusive: ':' for show-rule bodies +// and '=' for let-binding values (a keyword expression never has both). +KeywordExpr { CodeKeyword callOrValue? keywordBody? } +callOrValue { CallExpr | CodeString } +keywordBody { ":" codeExprBody | "=" codeValue } AtomExpr { CodeBool } // CallExpr allows zero suffixes — used at top level (#x) and after keywords @@ -83,7 +85,8 @@ CallExpr { CodeIdent callSuffix* } FuncExpr { CodeIdent callSuffix+ } callSuffix { CodeArgs | - "." CodeIdent + "." CodeIdent | + ContentBlock } CodeArgs { "(" codeArgList? ")" } @@ -235,7 +238,10 @@ Escape { "\\" EscapeChar } // by MarkupContent (redundant since '_' is in MarkupContent's exclusion // set, but kept for clarity). // CodeIdent and StrongText/EmphText are now external tokens — not listed. - @precedence { CodeBool EscapeChar "(" "." "]" "_" spaces MarkupContent } + // "[" > MarkupContent: ContentBlock callSuffix wins in merged code/markup states. + // CodeString > MarkupContent: '"' starts a string literal after a keyword. + // ":" > MarkupContent: keywordBody ':' wins over markup colon in code states. + @precedence { CodeBool EscapeChar CodeString "[" ":" "(" "." "]" "_" spaces MarkupContent } } @skip { spaces }