From 8faf55f0e7785a0c0e7aafd2c359e3e3602fb4bf Mon Sep 17 00:00:00 2001 From: Gordon Woodhull Date: Mon, 23 Feb 2026 14:10:17 -0500 Subject: [PATCH 1/2] claude: clean up test artifacts Co-Authored-By: Claude Opus 4.6 --- .../docs/smoke-all/2023/11/17/foo/.gitignore | 2 + .../2023/12/04/7784/subdir/index.typ | 240 ------------------ .../2024/02/01/issue-8514/.gitignore | 2 + .../docs/smoke-all/2024/02/22/8814/.gitignore | 3 +- .../docs/smoke-all/2024/02/22/8843/.gitignore | 3 +- 5 files changed, 8 insertions(+), 242 deletions(-) delete mode 100644 tests/docs/smoke-all/2023/12/04/7784/subdir/index.typ diff --git a/tests/docs/smoke-all/2023/11/17/foo/.gitignore b/tests/docs/smoke-all/2023/11/17/foo/.gitignore index 075b2542afb..0e3521a7d0f 100644 --- a/tests/docs/smoke-all/2023/11/17/foo/.gitignore +++ b/tests/docs/smoke-all/2023/11/17/foo/.gitignore @@ -1 +1,3 @@ /.quarto/ + +**/*.quarto_ipynb diff --git a/tests/docs/smoke-all/2023/12/04/7784/subdir/index.typ b/tests/docs/smoke-all/2023/12/04/7784/subdir/index.typ deleted file mode 100644 index 21eed240f20..00000000000 --- a/tests/docs/smoke-all/2023/12/04/7784/subdir/index.typ +++ /dev/null @@ -1,240 +0,0 @@ -// Some definitions presupposed by pandoc's typst output. -#let blockquote(body) = [ - #set text( size: 0.92em ) - #block(inset: (left: 1.5em, top: 0.2em, bottom: 0.2em))[#body] -] - -#let horizontalrule = [ - #line(start: (25%,0%), end: (75%,0%)) -] - -#let endnote(num, contents) = [ - #stack(dir: ltr, spacing: 3pt, super[#num], contents) -] - -#show terms: it => { - it.children - .map(child => [ - #strong[#child.term] - #block(inset: (left: 1.5em, top: -0.4em))[#child.description] - ]) - .join() -} - -// Some quarto-specific definitions. - -#show raw.where(block: true): block.with( - fill: luma(230), - width: 100%, - inset: 8pt, - radius: 2pt - ) - -#let block_with_new_content(old_block, new_content) = { - let d = (:) - let fields = old_block.fields() - fields.remove("body") - if fields.at("below", default: none) != none { - // TODO: this is a hack because below is a "synthesized element" - // according to the experts in the typst discord... - fields.below = fields.below.amount - } - return block.with(..fields)(new_content) -} - -#let empty(v) = { - if type(v) == "string" { - // two dollar signs here because we're technically inside - // a Pandoc template :grimace: - v.matches(regex("^\\s*$")).at(0, default: none) != none - } else if type(v) == "content" { - if v.at("text", default: none) != none { - return empty(v.text) - } - for child in v.at("children", default: ()) { - if not empty(child) { - return false - } - } - return true - } - -} - -#show figure: it => { - let kind_match = it.kind.matches(regex("^quarto-callout-(.*)")).at(0, default: none) - if kind_match != none { - let kind = kind_match.captures.at(0, default: "other") - kind = upper(kind.first()) + kind.slice(1) - // now we pull apart the callout and reassemble it with the crossref name and counter - - // when we cleanup pandoc's emitted code to avoid spaces this will have to change - let old_callout = it.body.children.at(1).body.children.at(1) - let old_title_block = old_callout.body.children.at(0) - let old_title = old_title_block.body.body.children.at(2) - - // TODO use custom separator if available - let new_title = if empty(old_title) { - [#kind #it.counter.display()] - } else { - [#kind #it.counter.display(): #old_title] - } - - let new_title_block = block_with_new_content( - old_title_block, - block_with_new_content( - old_title_block.body, - old_title_block.body.body.children.at(0) + - old_title_block.body.body.children.at(1) + - new_title)) - - block_with_new_content(old_callout, - new_title_block + - old_callout.body.children.at(1)) - } else { - it - } -} - -#show ref: it => locate(loc => { - let target = query(it.target, loc).first() - if it.at("supplement", default: none) == none { - it - return - } - - let sup = it.supplement.text.matches(regex("^45127368-afa1-446a-820f-fc64c546b2c5%(.*)")).at(0, default: none) - if sup != none { - let parent_id = sup.captures.first() - let parent_figure = query(label(parent_id), loc).first() - let parent_location = parent_figure.location() - - let counters = numbering( - parent_figure.at("numbering"), - ..parent_figure.at("counter").at(parent_location)) - - let subcounter = numbering( - target.at("numbering"), - ..target.at("counter").at(target.location())) - - // NOTE there's a nonbreaking space in the block below - link(target.location(), [#parent_figure.at("supplement") #counters#subcounter]) - } else { - it - } -}) - -// 2023-10-09: #fa-icon("fa-info") is not working, so we'll eval "#fa-info()" instead -#let callout(body: [], title: "Callout", background_color: rgb("#dddddd"), icon: none, icon_color: black) = { - block( - breakable: false, - fill: background_color, - stroke: (paint: icon_color, thickness: 0.5pt, cap: "round"), - width: 100%, - radius: 2pt, - block( - inset: 1pt, - width: 100%, - below: 0pt, - block( - fill: background_color, - width: 100%, - inset: 8pt)[#text(icon_color, weight: 900)[#icon] #title]) + - block( - inset: 1pt, - width: 100%, - block(fill: white, width: 100%, inset: 8pt, body))) -} - - - -#let article( - title: none, - authors: none, - date: none, - abstract: none, - cols: 1, - margin: (x: 1.25in, y: 1.25in), - paper: "us-letter", - lang: "en", - region: "US", - font: (), - fontsize: 11pt, - sectionnumbering: none, - toc: false, - doc, -) = { - set page( - paper: paper, - margin: margin, - numbering: "1", - ) - set par(justify: true) - set text(lang: lang, - region: region, - font: font, - size: fontsize) - set heading(numbering: sectionnumbering) - - if title != none { - align(center)[#block(inset: 2em)[ - #text(weight: "bold", size: 1.5em)[#title] - ]] - } - - if authors != none { - let count = authors.len() - let ncols = calc.min(count, 3) - grid( - columns: (1fr,) * ncols, - row-gutter: 1.5em, - ..authors.map(author => - align(center)[ - #author.name \ - #author.affiliation \ - #author.email - ] - ) - ) - } - - if date != none { - align(center)[#block(inset: 1em)[ - #date - ]] - } - - if abstract != none { - block(inset: 2em)[ - #text(weight: "semibold")[Abstract] #h(1em) #abstract - ] - } - - if toc { - block(above: 0em, below: 2em)[ - #outline( - title: auto, - depth: none - ); - ] - } - - if cols == 1 { - doc - } else { - columns(cols, doc) - } -} -#show: doc => article( - title: [subdir index], - cols: 1, - doc, -) - - - - - - -#bibliography("../refs.bib") - diff --git a/tests/docs/smoke-all/2024/02/01/issue-8514/.gitignore b/tests/docs/smoke-all/2024/02/01/issue-8514/.gitignore index 075b2542afb..0e3521a7d0f 100644 --- a/tests/docs/smoke-all/2024/02/01/issue-8514/.gitignore +++ b/tests/docs/smoke-all/2024/02/01/issue-8514/.gitignore @@ -1 +1,3 @@ /.quarto/ + +**/*.quarto_ipynb diff --git a/tests/docs/smoke-all/2024/02/22/8814/.gitignore b/tests/docs/smoke-all/2024/02/22/8814/.gitignore index 2a6b7e85562..1805b039507 100644 --- a/tests/docs/smoke-all/2024/02/22/8814/.gitignore +++ b/tests/docs/smoke-all/2024/02/22/8814/.gitignore @@ -1,2 +1,3 @@ /.quarto/ -/_book/ \ No newline at end of file +/_book/ +**/*.quarto_ipynb diff --git a/tests/docs/smoke-all/2024/02/22/8843/.gitignore b/tests/docs/smoke-all/2024/02/22/8843/.gitignore index 2a6b7e85562..1805b039507 100644 --- a/tests/docs/smoke-all/2024/02/22/8843/.gitignore +++ b/tests/docs/smoke-all/2024/02/22/8843/.gitignore @@ -1,2 +1,3 @@ /.quarto/ -/_book/ \ No newline at end of file +/_book/ +**/*.quarto_ipynb From 912e1065922bcdb22e6636fb4aa5147804595b1d Mon Sep 17 00:00:00 2001 From: Gordon Woodhull Date: Mon, 23 Feb 2026 14:10:40 -0500 Subject: [PATCH 2/2] claude: add QUARTO_PDF_STANDARD env var and pdf analysis tools Add environment variable fallback for pdf-standard option so any document without an explicit pdf-standard setting inherits from QUARTO_PDF_STANDARD (comma-separated, e.g. "ua-1" or "a-2b,ua-1"). Also add tools/find-tests.ts to find test documents by format and tools/filter-pdf-errors.ts to extract and summarize PDF validation errors from render logs. Co-Authored-By: Claude Opus 4.6 --- src/command/render/output-tex.ts | 4 +- src/command/render/output-typst.ts | 7 +- src/config/constants.ts | 9 ++ src/format/pdf/format-pdf.ts | 4 +- tools/filter-pdf-errors.ts | 233 +++++++++++++++++++++++++++++ tools/find-tests.ts | 80 ++++++++++ 6 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 tools/filter-pdf-errors.ts create mode 100644 tools/find-tests.ts diff --git a/src/command/render/output-tex.ts b/src/command/render/output-tex.ts index 6a0d10deb0d..3527ff5cccc 100644 --- a/src/command/render/output-tex.ts +++ b/src/command/render/output-tex.ts @@ -18,6 +18,7 @@ import { kPdfStandard, kPdfStandardApplied, kTargetFormat, + pdfStandardEnv, } from "../../config/constants.ts"; import { Format } from "../../config/types.ts"; import { asArray } from "../../core/array.ts"; @@ -90,7 +91,8 @@ export function texToPdfOutputRecipe( const pdfStandards = asArray( pandocOptions.format.metadata?.[kPdfStandardApplied] ?? format.render?.[kPdfStandard] ?? - format.metadata?.[kPdfStandard], + format.metadata?.[kPdfStandard] ?? + pdfStandardEnv(), ) as string[]; if (pdfStandards.length > 0) { await validatePdfStandards(pdfOutput, pdfStandards, { diff --git a/src/command/render/output-typst.ts b/src/command/render/output-typst.ts index 6c970405bad..7064980ec52 100644 --- a/src/command/render/output-typst.ts +++ b/src/command/render/output-typst.ts @@ -34,6 +34,7 @@ import { kOutputFile, kPdfStandard, kVariant, + pdfStandardEnv, } from "../../config/constants.ts"; import { error, warning } from "../../deno_ral/log.ts"; import { ErrorEx } from "../../core/lib/error.ts"; @@ -158,7 +159,8 @@ export function typstPdfOutputRecipe( ), pdfStandard: normalizePdfStandardForTypst( asArray( - format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard], + format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ?? + pdfStandardEnv(), ), ), }; @@ -185,7 +187,8 @@ export function typstPdfOutputRecipe( // Validate PDF against specified standards using verapdf (if available) const pdfStandards = asArray( - format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard], + format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ?? + pdfStandardEnv(), ) as string[]; if (pdfStandards.length > 0) { await validatePdfStandards(pdfOutput, pdfStandards, { diff --git a/src/config/constants.ts b/src/config/constants.ts index ebb65064084..749fb8a4e62 100644 --- a/src/config/constants.ts +++ b/src/config/constants.ts @@ -88,6 +88,15 @@ export const kKeepTex = "keep-tex"; export const kKeepTyp = "keep-typ"; export const kPdfStandard = "pdf-standard"; export const kPdfStandardApplied = "pdf-standard-applied"; + +/** Read QUARTO_PDF_STANDARD env var as a fallback for pdf-standard option. */ +export function pdfStandardEnv(): string[] | undefined { + const val = Deno.env.get("QUARTO_PDF_STANDARD"); + if (val) { + return val.split(",").map((s) => s.trim()).filter((s) => s.length > 0); + } + return undefined; +} export const kKeepIpynb = "keep-ipynb"; export const kKeepSource = "keep-source"; export const kVariant = "variant"; diff --git a/src/format/pdf/format-pdf.ts b/src/format/pdf/format-pdf.ts index b38ada6b204..b37099f70c4 100644 --- a/src/format/pdf/format-pdf.ts +++ b/src/format/pdf/format-pdf.ts @@ -38,6 +38,7 @@ import { kTblCapLoc, kTopLevelDivision, kWarning, + pdfStandardEnv, } from "../../config/constants.ts"; import { warning } from "../../deno_ral/log.ts"; import { asArray } from "../../core/array.ts"; @@ -326,7 +327,8 @@ function createPdfFormat( // Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance const pdfStandard = asArray( - format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard], + format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ?? + pdfStandardEnv(), ); if (pdfStandard.length > 0) { const { version, standards, needsTagging } = diff --git a/tools/filter-pdf-errors.ts b/tools/filter-pdf-errors.ts new file mode 100644 index 00000000000..eb84cae19d4 --- /dev/null +++ b/tools/filter-pdf-errors.ts @@ -0,0 +1,233 @@ +#!/usr/bin/env -S quarto run +/** + * filter-pdf-errors.ts + * + * Parse a quarto render log and extract PDF validation errors, + * showing which files failed and why. Aggregates errors by type at the end. + * + * Handles two error formats: + * - Typst compiler errors: "error: PDF/UA-1 error: missing alt text" + * - verapdf validation failures: "WARN: PDF validation failed for ua-2:\n" + * + * Usage: + * quarto run tools/filter-pdf-errors.ts + * + * Reads from stdin if no file is given. + */ + +// Strip ANSI escape codes +function stripAnsi(s: string): string { + return s.replace(/\x1b\[[0-9;]*m/g, ""); +} + +// Unescape HTML entities from verapdf output +function unescapeHtml(s: string): string { + return s + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/&/g, "&") + .replace(/"/g, '"') + .replace(/'/g, "'"); +} + +interface RenderBlock { + inputFile: string; + outputFile: string; + lines: string[]; +} + +interface ErrorEntry { + file: string; + errorType: string; + context: string[]; +} + +function extractErrors(block: RenderBlock): { + seenErrors: Set; + context: string[]; +} { + const context: string[] = []; + let inStack = false; + const seenErrors = new Set(); + + for (let i = 0; i < block.lines.length; i++) { + const line = block.lines[i]; + + // Skip download lines + if (/^Download /.test(line.trim())) continue; + // Skip blank-ish lines at start + if (context.length === 0 && line.trim() === "") continue; + + // Detect start of stack trace + if (/^Stack trace:/.test(line.trim())) { + inStack = true; + continue; + } + if (inStack) { + if (/^\s+at /.test(line)) continue; + inStack = false; + } + + // Skip duplicate ERROR: lines from typst + if (/^ERROR: error: PDF\//.test(line.trim())) continue; + if (/^ERROR: Typst compilation failed/.test(line.trim())) continue; + + // Typst compiler errors: "error: PDF/UA-1 error: missing alt text" + const typstMatch = line.match(/error: (PDF\/\S+ error: .+)/); + if (typstMatch) { + const errType = typstMatch[1].trim(); + seenErrors.add(errType); + } + + // verapdf failures: "WARN: PDF validation failed for :" + // followed by one or more rule description lines until a blank line or "Output created" + const verapdfMatch = line.match( + /^WARN: PDF validation failed for ([\w-]+):$/, + ); + if (verapdfMatch) { + const standard = verapdfMatch[1]; + // Collect the rule lines that follow + for (let j = i + 1; j < block.lines.length; j++) { + const ruleLine = block.lines[j].trim(); + if (ruleLine === "" || /^Output created/.test(ruleLine)) break; + const errType = `${standard}: ${ruleLine}`; + seenErrors.add(errType); + } + } + + context.push(line); + } + + // Trim trailing blank lines + while (context.length > 0 && context[context.length - 1].trim() === "") { + context.pop(); + } + + return { seenErrors, context }; +} + +async function main() { + const path = Deno.args[0]; + let text: string; + if (path) { + text = await Deno.readTextFile(path); + } else { + const buf = await new Response(Deno.stdin.readable).text(); + text = buf; + } + + const rawLines = text.split("\n"); + const lines = rawLines.map((l) => unescapeHtml(stripAnsi(l))); + + // Parse into render blocks. Each block starts with either a + // "Rendering .qmd" line or a "pandoc" header. + const blocks: RenderBlock[] = []; + let current: RenderBlock | null = null; + let pendingInputFile = ""; + + for (const line of lines) { + // "Rendering docs/smoke-all/.../foo.qmd" precedes the pandoc block + const renderMatch = line.match(/^Rendering\s+(\S+\.qmd)\s*$/); + if (renderMatch) { + pendingInputFile = renderMatch[1]; + continue; + } + + if (/^pandoc\s*$/.test(line.trim())) { + if (current) blocks.push(current); + current = { inputFile: pendingInputFile, outputFile: "", lines: [] }; + pendingInputFile = ""; + current.lines.push(line); + continue; + } + if (current) { + current.lines.push(line); + const m = line.match(/^\s*output-file:\s*(.+)/); + if (m) { + current.outputFile = m[1].trim(); + } + } + } + if (current) blocks.push(current); + + // Extract errors from each block + const errors: ErrorEntry[] = []; + const errorCounts = new Map(); + const errorFiles = new Map(); + + for (const block of blocks) { + // Check for either error format + const hasError = block.lines.some( + (l) => + l.includes("error: PDF/") || + l.includes("ERROR: error: PDF/") || + l.includes("PDF validation failed"), + ); + if (!hasError) continue; + + const { seenErrors, context } = extractErrors(block); + const displayFile = block.inputFile || block.outputFile; + + for (const errType of seenErrors) { + errorCounts.set(errType, (errorCounts.get(errType) || 0) + 1); + const files = errorFiles.get(errType) || []; + if (!files.includes(displayFile)) { + files.push(displayFile); + } + errorFiles.set(errType, files); + + errors.push({ + file: displayFile, + errorType: errType, + context, + }); + } + } + + // Print per-file errors + const printedFiles = new Set(); + for (const err of errors) { + if (printedFiles.has(err.file)) continue; + printedFiles.add(err.file); + + const fileErrors = errors.filter((e) => e.file === err.file); + const types = [...new Set(fileErrors.map((e) => e.errorType))]; + + console.log("─".repeat(72)); + console.log(`FILE: ${err.file}`); + console.log(`ERRORS: ${types.join(", ")}`); + console.log(""); + for (const line of err.context) { + console.log(" " + line); + } + console.log(""); + } + + // Print summary + console.log("═".repeat(72)); + console.log("SUMMARY"); + console.log("═".repeat(72)); + console.log(""); + console.log(`Total files with errors: ${printedFiles.size}`); + console.log(`Total files rendered: ${blocks.length}`); + console.log(""); + + // Sort by count descending + const sorted = [...errorCounts.entries()].sort((a, b) => b[1] - a[1]); + for (const [errType, count] of sorted) { + console.log(` ${count.toString().padStart(4)} ${errType}`); + } + console.log(""); + + // List files per error type + for (const [errType] of sorted) { + const files = errorFiles.get(errType) || []; + console.log(`${errType} (${files.length} files):`); + for (const f of files) { + console.log(` - ${f}`); + } + console.log(""); + } +} + +main(); diff --git a/tools/find-tests.ts b/tools/find-tests.ts new file mode 100644 index 00000000000..14aea1ff659 --- /dev/null +++ b/tools/find-tests.ts @@ -0,0 +1,80 @@ +/** + * find-tests.ts + * + * Find all .qmd files under a directory that target a given format. + * + * Usage: + * quarto run --dev tools/find-tests.ts + * + * A document matches if the format name appears as: + * - The value of `format:` (string) in its YAML front matter + * - A key under `format:` (object) in its YAML front matter + * - A key under `_quarto.tests:` in its YAML front matter + */ + +import { walk } from "../src/deno_ral/fs.ts"; +import { readYamlFromMarkdown, readYaml } from "../src/core/yaml.ts"; +import { dirname, join, relative } from "../src/deno_ral/path.ts"; +import { existsSync } from "../src/deno_ral/fs.ts"; + +const [format, dir] = Deno.args; +if (!format || !dir) { + console.error("Usage: quarto run tools/find-tests.ts "); + Deno.exit(1); +} + +function hasFormat( + yaml: Record, + format: string, +): boolean { + // Check format: or format: { : ... } + const fmt = yaml["format"]; + if (typeof fmt === "string" && fmt === format) { + return true; + } + if (fmt && typeof fmt === "object" && format in (fmt as Record)) { + return true; + } + + // Check _quarto.tests. + const quarto = yaml["_quarto"] as Record | undefined; + if (quarto) { + const tests = quarto["tests"] as Record | undefined; + if (tests && format in tests) { + return true; + } + } + + return false; +} + +for await (const entry of walk(dir, { exts: [".qmd"], includeDirs: false })) { + try { + const content = Deno.readTextFileSync(entry.path); + const yaml = readYamlFromMarkdown(content) as Record; + if (hasFormat(yaml, format)) { + console.log(relative(Deno.cwd(), entry.path)); + continue; + } + + // Check _quarto.yml in the same directory and ancestors up to dir + let current = dirname(entry.path); + const root = Deno.realPathSync(dir); + while (true) { + const quartoYml = join(current, "_quarto.yml"); + if (existsSync(quartoYml)) { + const projYaml = readYaml(quartoYml) as Record; + if (hasFormat(projYaml, format)) { + console.log(relative(Deno.cwd(), entry.path)); + break; + } + } + if (Deno.realPathSync(current) === root) break; + const parent = dirname(current); + if (parent === current) break; + current = parent; + } + } catch { + // skip files that can't be parsed + } +}