Files
claude 1c323351a2
Build and Deploy Verso / deploy (push) Successful in 9m46s
fix: parse Quarto schema YAML errors and stop heading style bleeding
Two unrelated fixes:

1. quarto-log-parser: handle the two-line Quarto schema-validation
   error format:
     ERROR: In file main.qmd
     (line 6, columns 24--27) Field "section-numbering" has value …
   Previously neither the file name nor the line number were extracted,
   so the error appeared without a red highlight. Now the first line
   stores the filename in pendingLocation and the second line creates
   the log entry with the correct file and line so the editor can jump
   to and highlight it.

2. headingTitleTokenizer: change contextual: false → contextual: true
   and guard with stack.canShift(HeadingTitle). With contextual: false
   Lezer calls the tokenizer speculatively at positions beyond the strict
   post-HeadingMark state; in some LALR-merged states the resulting token
   was accepted for body-text lines, making them render as bold-blue
   heading text. The contextual guard ensures the tokenizer only fires
   in the one state where HeadingTitle is legitimately valid.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 13:31:26 +00:00

240 lines
8.5 KiB
TypeScript

import { LatexLogEntry, ParseResult } from './latex-log-parser'
// Parser for the combined stdout/stderr that `quarto render` writes to
// output.log (see services/clsi/app/js/QuartoRunner.js). Quarto orchestrates
// several tools, each with its own diagnostic style:
//
// - Typst (the engine for .qmd -> PDF): emits
// error: unexpected end of block comment
// ┌─ main.typ:5:10
// ...and the analogous `warning: ...` form. Older builds use `-->` instead
// of the box-drawing arrow.
// - Pandoc (markdown -> typst/html): emits `[WARNING] ...` / `[ERROR] ...`.
// - Quarto CLI itself (YAML validation, project errors, Deno crashes): emits
// `ERROR: ...` / `WARNING: ...` (upper-case) or `error: Uncaught ...`.
// - knitr/R (.Rmd / executable cells): emits `Quitting from lines 3-7 (x.qmd)`
// followed by an `Error: ...` message.
//
// This is deliberately a flat, line-oriented parser rather than the stateful
// LaTeX one: Quarto's output has no nested-file `(...)` structure to track.
// It returns the same ParseResult shape so the rest of the log pipeline
// (HumanReadableLogs consumers, the errors/warnings tabs, editor annotations)
// can treat Quarto entries exactly like LaTeX ones.
// eslint-disable-next-line no-control-regex
const ANSI_REGEX = /\x1b\[[0-9;]*m/g
// Typst / Deno: `error: message`, `warning: message` (lower-case prefix)
const LOWER_DIAG_REGEX = /^(error|warning): (.*)$/
// Quarto CLI: `ERROR: message`, `WARNING: message` (upper-case prefix)
const UPPER_DIAG_REGEX = /^(ERROR|WARNING): (.*)$/
// Pandoc: `[WARNING] message`, `[ERROR] message`, `[INFO] message`
const PANDOC_REGEX = /^\[(WARNING|ERROR|INFO)\] (.*)$/
// knitr/R: `Quitting from lines 3-7 (slides.qmd)`
const R_QUITTING_REGEX = /^Quitting from lines? (\d+)(?:-\d+)?\s*(?:\(([^)]+)\))?/
// Python (Jupyter cell execution): a missing dependency, e.g.
// ModuleNotFoundError: No module named 'pandas'
// ImportError: No module named scipy
const PY_MODULE_REGEX =
/^(?:ModuleNotFoundError|ImportError): No module named ['"]?([\w.]+)['"]?/
// Import (module) name -> PyPI package name, for the common cases where they
// differ. Anything not listed defaults to the module name itself.
const PY_MODULE_TO_PACKAGE: Record<string, string> = {
cv2: 'opencv-python',
sklearn: 'scikit-learn',
skimage: 'scikit-image',
PIL: 'Pillow',
yaml: 'PyYAML',
bs4: 'beautifulsoup4',
Crypto: 'pycryptodome',
OpenSSL: 'pyOpenSSL',
dateutil: 'python-dateutil',
dotenv: 'python-dotenv',
serial: 'pyserial',
usb: 'pyusb',
cairo: 'pycairo',
gi: 'PyGObject',
win32com: 'pywin32',
}
// A typst diagnostic location line: ` ┌─ main.typ:5:10` / ` --> main.typ:5:10`
const TYPST_LOCATION_REGEX = /(?:[┌╭]─|-->)\s*(.+?):(\d+):(\d+)/
// Quarto schema-validation location line emitted after `ERROR: In file <f>`:
// (line 6, columns 24--27) Field "section-numbering" has value …
const QUARTO_SCHEMA_LOC_REGEX = /^\(line (\d+), columns? \d+(?:--\d+)?\)\s+(.*)/
// Extracts the filename from an `ERROR: In file <filename>` message.
const QUARTO_IN_FILE_REGEX = /^In file (.+)$/
function stripAnsi(line: string): string {
return line.replace(ANSI_REGEX, '')
}
function isDiagnosticStart(trimmed: string): boolean {
return (
LOWER_DIAG_REGEX.test(trimmed) ||
UPPER_DIAG_REGEX.test(trimmed) ||
PANDOC_REGEX.test(trimmed) ||
QUARTO_SCHEMA_LOC_REGEX.test(trimmed)
)
}
export default function parseQuartoLog(rawLog: string): ParseResult {
const lines = rawLog.replace(/\r\n?/g, '\n').split('\n')
const data: LatexLogEntry[] = []
let pendingLocation: { file?: string; line?: number } = {}
for (let i = 0; i < lines.length; i++) {
const clean = stripAnsi(lines[i])
const trimmed = clean.trimStart()
// Remember the most recent knitr location; it precedes the `Error:` line.
const quitting = trimmed.match(R_QUITTING_REGEX)
if (quitting) {
pendingLocation = {
line: parseInt(quitting[1], 10),
file: quitting[2],
}
continue
}
// A missing Python package when executing a {python} cell. Turn the raw
// traceback line into an actionable message rather than letting it slip
// through as an opaque error (or not be surfaced at all).
const pyModule = trimmed.match(PY_MODULE_REGEX)
if (pyModule) {
const moduleName = pyModule[1]
// Suggest the PyPI package for the top-level module (cv2 -> opencv-python).
const topLevel = moduleName.split('.')[0]
const suggestion = PY_MODULE_TO_PACKAGE[topLevel] || topLevel
data.push({
line: pendingLocation.line ?? null,
file: pendingLocation.file,
level: 'error',
message: `Python module "${moduleName}" is not available`,
content:
`${clean}\n\n` +
`If "${topLevel}" is a PyPI package, add \`${suggestion}\` to your ` +
`Verso requirements file (requirements.vrf) and recompile as the ` +
`project owner or a collaborator. If it is your own module, add its ` +
`.py file(s) to the project instead.\n` +
`Pre-installed: numpy, pandas, scipy, matplotlib, seaborn, ` +
`scikit-learn, sympy, plotly, tabulate, opencv-python (cv2), tqdm.`,
raw: clean,
})
pendingLocation = {}
continue
}
// Quarto schema-validation errors span two lines:
// ERROR: In file main.qmd ← stores pendingLocation.file
// (line 6, columns 24--27) Field … ← this line carries line + message
let m: RegExpMatchArray | null
if ((m = trimmed.match(QUARTO_SCHEMA_LOC_REGEX))) {
const logLine = parseInt(m[1], 10)
const msg = m[2].trim()
let content = clean
let j = i + 1
for (; j < lines.length; j++) {
const next = stripAnsi(lines[j])
if (next.trim() === '') break
if (isDiagnosticStart(next.trimStart())) break
content += '\n' + next
}
i = j - 1
data.push({
line: logLine,
file: pendingLocation.file,
level: 'error',
message: msg,
content,
raw: content,
})
pendingLocation = {}
continue
}
let level: LatexLogEntry['level'] | null = null
let message: string | null = null
if ((m = trimmed.match(LOWER_DIAG_REGEX))) {
level = m[1] === 'error' ? 'error' : 'warning'
message = m[2]
} else if ((m = trimmed.match(UPPER_DIAG_REGEX))) {
// `ERROR: In file <filename>` is a preamble for a schema-validation error;
// store the filename and defer the entry to the (line N, …) line that follows.
const inFile = m[2].trim().match(QUARTO_IN_FILE_REGEX)
if (inFile) {
pendingLocation = { ...pendingLocation, file: inFile[1].trim() }
continue
}
level = m[1] === 'ERROR' ? 'error' : 'warning'
message = m[2]
} else if ((m = trimmed.match(PANDOC_REGEX))) {
if (m[1] === 'INFO') continue // pandoc INFO lines are not actionable
level = m[1] === 'ERROR' ? 'error' : 'warning'
message = m[2]
}
if (level === null || message === null) continue
// Accumulate any following indented/diagnostic lines (the typst box, a Deno
// stack trace, R traceback) as the entry's content, and pick up a
// file:line:col location from the typst box if present. Stop at a blank
// line or the start of the next diagnostic.
let file = pendingLocation.file
let line: number | null = pendingLocation.line ?? null
let content = clean
let j = i + 1
for (; j < lines.length; j++) {
const next = stripAnsi(lines[j])
if (next.trim() === '') break
if (isDiagnosticStart(next.trimStart())) break
content += '\n' + next
const loc = next.match(TYPST_LOCATION_REGEX)
if (loc && !file) {
file = loc[1]
line = parseInt(loc[2], 10)
}
}
i = j - 1
data.push({
line,
file,
level,
message: message.trim(),
content,
raw: content,
})
pendingLocation = {}
}
return postProcess(data)
}
function postProcess(data: LatexLogEntry[]): ParseResult {
const all: LatexLogEntry[] = []
const byLevel: Record<'error' | 'warning' | 'typesetting', LatexLogEntry[]> = {
error: [],
warning: [],
typesetting: [],
}
const seen = new Set<string>()
for (const entry of data) {
if (seen.has(entry.raw)) continue
seen.add(entry.raw)
byLevel[entry.level]?.push(entry)
all.push(entry)
}
return {
errors: byLevel.error,
warnings: byLevel.warning,
typesetting: byLevel.typesetting,
all,
files: [],
}
}