fix(typst): make HeadingTitle an external token to end LALR conflicts
Build and Deploy Verso / deploy (push) Has been cancelled
Build and Deploy Verso / deploy (push) Has been cancelled
Any item shared between headingTitleItem and document-level item causes a shift/reduce conflict: the LALR automaton merges the two contexts and makes the shared token ambiguous. The only structural fix is to make HeadingTitle a terminal (external tokenizer) that reads greedily to EOL, giving the LR state machine a context-isolated token that can never collide with document-level item tokens. Removes headingTitleItem sub-rule, HeadingText token, and updates styleTags to match HeadingTitle directly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -30,8 +30,8 @@ export const TypstLanguage = LRLanguage.define({
|
||||
CodeArgs: foldInside,
|
||||
}),
|
||||
styleTags({
|
||||
// Headings
|
||||
'HeadingMark HeadingText': t.heading,
|
||||
// Headings (HeadingTitle is an external token, one terminal per line)
|
||||
'HeadingMark HeadingTitle': t.heading,
|
||||
|
||||
// Comments
|
||||
'LineComment LineCommentContent': t.comment,
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import { ExternalTokenizer } from '@lezer/lr'
|
||||
import {
|
||||
HeadingMark,
|
||||
HeadingTitle,
|
||||
RawBlockOpen,
|
||||
RawBlockBody,
|
||||
RawBlockClose,
|
||||
@@ -47,6 +48,23 @@ export const headingTokenizer = new ExternalTokenizer(
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── headingTitleTokenizer ────────────────────────────────────────────────
|
||||
// Emits HeadingTitle — everything from the current position to end-of-line.
|
||||
// Using an external token (terminal) instead of grammar sub-items avoids
|
||||
// LALR(1) conflicts: any token shared between headingTitleItem and document-
|
||||
// level item causes the automaton to merge the two contexts.
|
||||
export const headingTitleTokenizer = new ExternalTokenizer(
|
||||
(input, _stack) => {
|
||||
let hasContent = false
|
||||
while (input.next !== -1 && input.next !== NEWLINE) {
|
||||
input.advance()
|
||||
hasContent = true
|
||||
}
|
||||
if (hasContent) input.acceptToken(HeadingTitle)
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// ── rawTokenizer ────────────────────────────────────────────────────────
|
||||
// Handles all three raw-block tokens (contextual: uses stack.canShift).
|
||||
//
|
||||
|
||||
@@ -30,17 +30,12 @@ item {
|
||||
// ── Headings ──────────────────────────────────────────────────────────────
|
||||
// HeadingMark is produced by an external tokenizer that enforces the
|
||||
// start-of-line constraint and captures the "=+" prefix + trailing space.
|
||||
Heading { HeadingMark HeadingTitle }
|
||||
// Strong and Emphasis are intentionally excluded from headingTitleItem.
|
||||
// Including them causes an LALR(1) conflict: since Strong/Emphasis also appear
|
||||
// in document-level `item`, the LR automaton merges heading-title states with
|
||||
// document-item states, making "*" ambiguous (Strong opener vs. end of heading).
|
||||
// Instead, HeadingText is widened to consume "*" and "_" as plain text inside
|
||||
// headings — they are not interpreted as markup delimiters there.
|
||||
HeadingTitle { headingTitleItem+ }
|
||||
headingTitleItem {
|
||||
CodeExpr | InlineMath | RawInline | Label | Ref | HeadingText
|
||||
}
|
||||
Heading { HeadingMark HeadingTitle? }
|
||||
// HeadingTitle is an external token (terminal) that reads everything to EOL.
|
||||
// Using an external token is the only reliable fix: any token that can start
|
||||
// both a headingTitleItem and a document-level item causes an LALR(1) conflict
|
||||
// after headingTitleItem+ because the automaton merges the two contexts.
|
||||
// External tokens are context-isolated by the LR state machine — no merging.
|
||||
|
||||
// ── Comments ──────────────────────────────────────────────────────────────
|
||||
LineComment { "//" LineCommentContent }
|
||||
@@ -131,6 +126,10 @@ Escape { "\\" EscapeChar }
|
||||
HeadingMark
|
||||
}
|
||||
|
||||
@external tokens headingTitleTokenizer from "./tokens.mjs" {
|
||||
HeadingTitle
|
||||
}
|
||||
|
||||
@external tokens rawTokenizer from "./tokens.mjs" {
|
||||
RawBlockOpen,
|
||||
RawBlockBody,
|
||||
@@ -188,8 +187,8 @@ Escape { "\\" EscapeChar }
|
||||
// Math content — everything between the $ delimiters (no crossing newlines).
|
||||
MathContent { ![$\n]+ }
|
||||
|
||||
// Text tokens for different markup contexts; each excludes its own delimiters.
|
||||
HeadingText { ![\n$#`<@\\]+ }
|
||||
// Text tokens for markup contexts; each excludes its own delimiters.
|
||||
// HeadingText is gone: HeadingTitle is now an external token (see above).
|
||||
StrongText { ![\n*$#`@\\]+ }
|
||||
EmphText { ![\n_$#`@\\]+ }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user