fix(typst): make HeadingTitle an external token to end LALR conflicts
Build and Deploy Verso / deploy (push) Has been cancelled

Any item shared between headingTitleItem and document-level item causes
a shift/reduce conflict: the LALR automaton merges the two contexts and
makes the shared token ambiguous. The only structural fix is to make
HeadingTitle a terminal (external tokenizer) that reads greedily to EOL,
giving the LR state machine a context-isolated token that can never
collide with document-level item tokens.

Removes headingTitleItem sub-rule, HeadingText token, and updates
styleTags to match HeadingTitle directly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
claude
2026-06-07 21:14:18 +00:00
parent 94e8ff3503
commit 2f3e3e7363
3 changed files with 32 additions and 15 deletions
@@ -30,8 +30,8 @@ export const TypstLanguage = LRLanguage.define({
CodeArgs: foldInside,
}),
styleTags({
// Headings
'HeadingMark HeadingText': t.heading,
// Headings (HeadingTitle is an external token, one terminal per line)
'HeadingMark HeadingTitle': t.heading,
// Comments
'LineComment LineCommentContent': t.comment,
@@ -3,6 +3,7 @@
import { ExternalTokenizer } from '@lezer/lr'
import {
HeadingMark,
HeadingTitle,
RawBlockOpen,
RawBlockBody,
RawBlockClose,
@@ -47,6 +48,23 @@ export const headingTokenizer = new ExternalTokenizer(
{ contextual: false }
)
// ── headingTitleTokenizer ────────────────────────────────────────────────
// Emits HeadingTitle — everything from the current position to end-of-line.
// Using an external token (terminal) instead of grammar sub-items avoids
// LALR(1) conflicts: any token shared between headingTitleItem and document-
// level item causes the automaton to merge the two contexts.
export const headingTitleTokenizer = new ExternalTokenizer(
(input, _stack) => {
let hasContent = false
while (input.next !== -1 && input.next !== NEWLINE) {
input.advance()
hasContent = true
}
if (hasContent) input.acceptToken(HeadingTitle)
},
{ contextual: false }
)
// ── rawTokenizer ────────────────────────────────────────────────────────
// Handles all three raw-block tokens (contextual: uses stack.canShift).
//
@@ -30,17 +30,12 @@ item {
// ── Headings ──────────────────────────────────────────────────────────────
// HeadingMark is produced by an external tokenizer that enforces the
// start-of-line constraint and captures the "=+" prefix + trailing space.
Heading { HeadingMark HeadingTitle }
// Strong and Emphasis are intentionally excluded from headingTitleItem.
// Including them causes an LALR(1) conflict: since Strong/Emphasis also appear
// in document-level `item`, the LR automaton merges heading-title states with
// document-item states, making "*" ambiguous (Strong opener vs. end of heading).
// Instead, HeadingText is widened to consume "*" and "_" as plain text inside
// headings — they are not interpreted as markup delimiters there.
HeadingTitle { headingTitleItem+ }
headingTitleItem {
CodeExpr | InlineMath | RawInline | Label | Ref | HeadingText
}
Heading { HeadingMark HeadingTitle? }
// HeadingTitle is an external token (terminal) that reads everything to EOL.
// Using an external token is the only reliable fix: any token that can start
// both a headingTitleItem and a document-level item causes an LALR(1) conflict
// after headingTitleItem+ because the automaton merges the two contexts.
// External tokens are context-isolated by the LR state machine — no merging.
// ── Comments ──────────────────────────────────────────────────────────────
LineComment { "//" LineCommentContent }
@@ -131,6 +126,10 @@ Escape { "\\" EscapeChar }
HeadingMark
}
@external tokens headingTitleTokenizer from "./tokens.mjs" {
HeadingTitle
}
@external tokens rawTokenizer from "./tokens.mjs" {
RawBlockOpen,
RawBlockBody,
@@ -188,8 +187,8 @@ Escape { "\\" EscapeChar }
// Math content — everything between the $ delimiters (no crossing newlines).
MathContent { ![$\n]+ }
// Text tokens for different markup contexts; each excludes its own delimiters.
HeadingText { ![\n$#`<@\\]+ }
// Text tokens for markup contexts; each excludes its own delimiters.
// HeadingText is gone: HeadingTitle is now an external token (see above).
StrongText { ![\n*$#`@\\]+ }
EmphText { ![\n_$#`@\\]+ }