From 2f3e3e7363d670c1d7c4ab8e9cafe8f27f4aabe2 Mon Sep 17 00:00:00 2001 From: claude Date: Sun, 7 Jun 2026 21:14:18 +0000 Subject: [PATCH] fix(typst): make HeadingTitle an external token to end LALR conflicts Any item shared between headingTitleItem and document-level item causes a shift/reduce conflict: the LALR automaton merges the two contexts and makes the shared token ambiguous. The only structural fix is to make HeadingTitle a terminal (external tokenizer) that reads greedily to EOL, giving the LR state machine a context-isolated token that can never collide with document-level item tokens. Removes headingTitleItem sub-rule, HeadingText token, and updates styleTags to match HeadingTitle directly. Co-Authored-By: Claude Sonnet 4.6 --- .../source-editor/languages/typst/index.ts | 4 +-- .../source-editor/lezer-typst/tokens.mjs | 18 +++++++++++++ .../source-editor/lezer-typst/typst.grammar | 25 +++++++++---------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/languages/typst/index.ts b/services/web/frontend/js/features/source-editor/languages/typst/index.ts index 731b949546..29c6d3fba0 100644 --- a/services/web/frontend/js/features/source-editor/languages/typst/index.ts +++ b/services/web/frontend/js/features/source-editor/languages/typst/index.ts @@ -30,8 +30,8 @@ export const TypstLanguage = LRLanguage.define({ CodeArgs: foldInside, }), styleTags({ - // Headings - 'HeadingMark HeadingText': t.heading, + // Headings (HeadingTitle is an external token, one terminal per line) + 'HeadingMark HeadingTitle': t.heading, // Comments 'LineComment LineCommentContent': t.comment, diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs index 6c8da5fc49..0ded687c9a 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-typst/tokens.mjs @@ -3,6 +3,7 @@ import { ExternalTokenizer } from '@lezer/lr' import { HeadingMark, + HeadingTitle, RawBlockOpen, RawBlockBody, RawBlockClose, @@ -47,6 +48,23 @@ export const headingTokenizer = new ExternalTokenizer( { contextual: false } ) +// ── headingTitleTokenizer ──────────────────────────────────────────────── +// Emits HeadingTitle — everything from the current position to end-of-line. +// Using an external token (terminal) instead of grammar sub-items avoids +// LALR(1) conflicts: any token shared between headingTitleItem and document- +// level item causes the automaton to merge the two contexts. +export const headingTitleTokenizer = new ExternalTokenizer( + (input, _stack) => { + let hasContent = false + while (input.next !== -1 && input.next !== NEWLINE) { + input.advance() + hasContent = true + } + if (hasContent) input.acceptToken(HeadingTitle) + }, + { contextual: false } +) + // ── rawTokenizer ──────────────────────────────────────────────────────── // Handles all three raw-block tokens (contextual: uses stack.canShift). // diff --git a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar index ad485b1704..68c9db2ec6 100644 --- a/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-typst/typst.grammar @@ -30,17 +30,12 @@ item { // ── Headings ────────────────────────────────────────────────────────────── // HeadingMark is produced by an external tokenizer that enforces the // start-of-line constraint and captures the "=+" prefix + trailing space. -Heading { HeadingMark HeadingTitle } -// Strong and Emphasis are intentionally excluded from headingTitleItem. -// Including them causes an LALR(1) conflict: since Strong/Emphasis also appear -// in document-level `item`, the LR automaton merges heading-title states with -// document-item states, making "*" ambiguous (Strong opener vs. end of heading). -// Instead, HeadingText is widened to consume "*" and "_" as plain text inside -// headings — they are not interpreted as markup delimiters there. -HeadingTitle { headingTitleItem+ } -headingTitleItem { - CodeExpr | InlineMath | RawInline | Label | Ref | HeadingText -} +Heading { HeadingMark HeadingTitle? } +// HeadingTitle is an external token (terminal) that reads everything to EOL. +// Using an external token is the only reliable fix: any token that can start +// both a headingTitleItem and a document-level item causes an LALR(1) conflict +// after headingTitleItem+ because the automaton merges the two contexts. +// External tokens are context-isolated by the LR state machine — no merging. // ── Comments ────────────────────────────────────────────────────────────── LineComment { "//" LineCommentContent } @@ -131,6 +126,10 @@ Escape { "\\" EscapeChar } HeadingMark } +@external tokens headingTitleTokenizer from "./tokens.mjs" { + HeadingTitle +} + @external tokens rawTokenizer from "./tokens.mjs" { RawBlockOpen, RawBlockBody, @@ -188,8 +187,8 @@ Escape { "\\" EscapeChar } // Math content — everything between the $ delimiters (no crossing newlines). MathContent { ![$\n]+ } - // Text tokens for different markup contexts; each excludes its own delimiters. - HeadingText { ![\n$#`<@\\]+ } + // Text tokens for markup contexts; each excludes its own delimiters. + // HeadingText is gone: HeadingTitle is now an external token (see above). StrongText { ![\n*$#`@\\]+ } EmphText { ![\n_$#`@\\]+ }