diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 0ded687c9a..5161ad47e8 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -10,6 +10,8 @@ import { RawInlineContent, CodeBlockBody, BlockCommentBody, + LineCommentContent, + MathContent, } from './typst.terms.mjs' const BACKTICK = 96 // ` @@ -19,6 +21,7 @@ const NEWLINE = 10 // \n const EQUALS = 61 // = const SPACE = 32 // const TAB = 9 // \t +const DOLLAR = 36 // $ const OPEN_BRACE = 123 // { const CLOSE_BRACE = 125 // } @@ -191,3 +194,36 @@ export const blockCommentTokenizer = new ExternalTokenizer( }, { contextual: false } ) + +// ── lineCommentContentTokenizer ───────────────────────────────────────── +// Emits LineCommentContent — everything from the current position to EOL. +// External rather than a @tokens rule because ![\n]+ conflicts with every +// non-newline literal token in LALR-merged states. External tokenizers are +// context-isolated: only called when the LR state expects this token. +export const lineCommentContentTokenizer = new ExternalTokenizer( + (input, _stack) => { + let hasContent = false + while (input.next !== -1 && input.next !== NEWLINE) { + input.advance() + hasContent = true + } + if (hasContent) input.acceptToken(LineCommentContent) + }, + { contextual: false } +) + +// ── mathContentTokenizer ──────────────────────────────────────────────── +// Emits MathContent — everything between the $...$ delimiters (no newlines). +// External rather than a @tokens rule for the same reason as LineCommentContent: +// ![$\n]+ overlaps with spaces, '<', '@', and other literals in merged states. +export const mathContentTokenizer = new ExternalTokenizer( + (input, _stack) => { + let hasContent = false + while (input.next !== -1 && input.next !== DOLLAR && input.next !== NEWLINE) { + input.advance() + hasContent = true + } + if (hasContent) input.acceptToken(MathContent) + }, + { contextual: false } +) diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar index ccfec7335b..7404ac9b00 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar @@ -148,6 +148,14 @@ Escape { "\\" EscapeChar } BlockCommentBody } +@external tokens lineCommentContentTokenizer from "./tokens.mjs" { + LineCommentContent +} + +@external tokens mathContentTokenizer from "./tokens.mjs" { + MathContent +} + // ── Regular tokens ──────────────────────────────────────────────────────── @tokens { // Horizontal whitespace only. Newlines are kept as explicit Newline items @@ -181,16 +189,14 @@ Escape { "\\" EscapeChar } ("pt" | "mm" | "cm" | "in" | "em" | "rem" | "fr" | "deg" | "rad" | "%")? } - // Comment content — everything to end of line. - LineCommentContent { ![\n]+ } - - // Math content — everything between the $ delimiters (no crossing newlines). - MathContent { ![$\n]+ } - // Text tokens for markup contexts; each excludes its own delimiters. - // HeadingText is gone: HeadingTitle is now an external token (see above). - StrongText { ![\n*$#`@\\]+ } - EmphText { ![\n_$#`@\\]+ } + // HeadingText, LineCommentContent, and MathContent are external tokens + // (see above) — broad "read-to-delimiter" tokens that would otherwise + // conflict with every other literal token in LALR-merged states. + // '<' is excluded from StrongText/EmphText so that Label ('<' LabelName '>') + // is recognised inside strong/emphasis rather than consumed as plain text. + StrongText { ![\n*$#`<@\\]+ } + EmphText { ![\n_$#`<@\\]+ } // Regular markup: excludes all special-character starters plus whitespace // (whitespace is handled by @skip). The '/' is excluded so that '//' and @@ -210,11 +216,10 @@ Escape { "\\" EscapeChar } // Resolve ambiguities: more-specific tokens win over broader catch-alls. // EscapeChar > spaces: after '\', EscapeChar must win over the skip token // (both match \t; without this, '\t' would be mis-tokenized). - // "(" > "." > text tokens: after '#' CodeIdent, callSuffix delimiters must win - // over MarkupContent/StrongText/EmphText in the LALR-merged state. - // "]" > LineCommentContent: inside #[...], ']' closes the ContentBlock even - // if it appears after '//' (comment does not "protect" the bracket). - @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" spaces MarkupContent StrongText EmphText LineCommentContent } + // "(" > "." > "]" > text tokens: after '#' CodeIdent, callSuffix delimiters + // must win over MarkupContent/StrongText/EmphText in merged states. + // LineCommentContent and MathContent are external tokens — not listed here. + @precedence { CodeKeyword CodeBool CodeIdent EscapeChar "(" "." "]" spaces MarkupContent StrongText EmphText } } @skip { spaces }