From ad6c78da3ab5c034f38f73410816e2c2fea384cf Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:26:28 -0600 Subject: [PATCH 1/2] ci(bench): gate release benchmark on engine parity thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Benchmark workflow already runs after every release (on workflow_run completion of Publish). Add a parity gate to the build-benchmark job so drift between the native and wasm engines fails the workflow — catching regressions the benchmark data would otherwise silently record. The gate runs after the benchmark doc PR is created, so the raw numbers still land in generated/benchmarks/BUILD-BENCHMARKS.md even when parity regresses; only the workflow status goes red to alert maintainers. Thresholds reference the currently-open parity bugs on v3.9.5: - File-set gap |wasm - native| ≤ 2 (#1011) - DB size ratio native/wasm ≤ 1.02 (#1010) - Full-build edges-phase ratio ≤ 1.30 (#1013) - Full-build roles-phase ratio ≤ 1.30 (#1013) - 1-file incremental ratio ≤ 1.50 (#1012) The gate writes a markdown table to \$GITHUB_STEP_SUMMARY showing pass/fail per threshold with a direct link to the tracking issue, so reviewers see the regression at a glance without digging through logs. No behavior change on the passing path — when both engines are within thresholds the step exits 0 silently. Impact: 2 functions changed, 2 affected --- .github/workflows/benchmark.yml | 7 ++ scripts/benchmark-parity-gate.mjs | 121 ++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 scripts/benchmark-parity-gate.mjs diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 9d7a755b..77f3affb 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -198,6 +198,13 @@ jobs: --body "Automated build benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})." fi + # Engine-parity gate: runs AFTER the doc PR is created so the PR still + # records raw benchmark data even when parity regresses. The job status + # going red alerts maintainers; the linked issues describe each threshold. + - name: Engine parity gate + if: steps.existing.outputs.skip != 'true' + run: node scripts/benchmark-parity-gate.mjs benchmark-result.json + embedding-benchmark: runs-on: ubuntu-latest # 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so diff --git a/scripts/benchmark-parity-gate.mjs b/scripts/benchmark-parity-gate.mjs new file mode 100644 index 00000000..ad2e5302 --- /dev/null +++ b/scripts/benchmark-parity-gate.mjs @@ -0,0 +1,121 @@ +#!/usr/bin/env node +/** + * Engine parity gate — runs after the release build benchmark. + * + * Reads the merged benchmark-result.json (contains `wasm` and `native` blocks) + * and fails the workflow if the gap between engines breaches a documented + * threshold. A failure here doesn't block the release (benchmark runs *after* + * Publish completes); it surfaces regressions to maintainers via the workflow's + * red status and writes a summary to $GITHUB_STEP_SUMMARY. + * + * Thresholds reference the parity bugs open against v3.9.5: + * - #1010 DB size / excess ast_nodes + * - #1011 Native orchestrator drops files + * - #1012 Native 1-file incremental runs globally + * - #1013 Native full-build edges/roles phases + * + * Each threshold fires only when BOTH engines produced results. If one engine + * failed, we leave the gate passing so the rest of the workflow (doc PR, + * artifact upload) still runs, and a separate "both engines ran" check flags + * the missing engine. + */ +import fs from 'node:fs'; +import path from 'node:path'; + +const resultFile = process.argv[2]; +if (!resultFile) { + console.error('Usage: benchmark-parity-gate.mjs '); + process.exit(2); +} + +const result = JSON.parse(fs.readFileSync(resultFile, 'utf8')); +const { wasm, native, version } = result; + +const summaryFile = process.env.GITHUB_STEP_SUMMARY; +const writeSummary = (text) => { + if (summaryFile) fs.appendFileSync(summaryFile, text); +}; + +function line(s = '') { + console.log(s); + writeSummary(`${s}\n`); +} + +line(`## Engine parity gate — v${version}`); +line(''); + +if (!wasm || !native) { + const missing = [!wasm && 'wasm', !native && 'native'].filter(Boolean).join(', '); + line(`**FAIL:** missing engine result for: ${missing}. Benchmark cannot assert parity.`); + process.exit(1); +} + +// ── Thresholds ───────────────────────────────────────────────────────── +// Each entry: +// name — human-readable label +// actual — computed metric +// limit — ceiling; actual must be ≤ limit +// formatter — how to render the value +// tracks — related issue link shown on failure +const checks = [ + { + name: 'File-set gap (|wasm − native|)', + actual: Math.abs(wasm.files - native.files), + limit: 2, + formatter: (v) => String(v), + tracks: '#1011', + }, + { + name: 'DB size ratio (native / wasm)', + actual: native.dbSizeBytes / wasm.dbSizeBytes, + limit: 1.02, + formatter: (v) => v.toFixed(3), + tracks: '#1010', + }, + { + name: 'Full-build edges-phase ratio', + actual: (native.phases?.edgesMs ?? 0) / Math.max(wasm.phases?.edgesMs ?? 1, 1), + limit: 1.3, + formatter: (v) => v.toFixed(2), + tracks: '#1013', + }, + { + name: 'Full-build roles-phase ratio', + actual: (native.phases?.rolesMs ?? 0) / Math.max(wasm.phases?.rolesMs ?? 1, 1), + limit: 1.3, + formatter: (v) => v.toFixed(2), + tracks: '#1013', + }, + { + name: '1-file incremental ratio', + actual: + (native.oneFileRebuildMs ?? 0) / + Math.max(wasm.oneFileRebuildMs ?? 1, 1), + limit: 1.5, + formatter: (v) => v.toFixed(2), + tracks: '#1012', + }, +]; + +line('| Check | Actual | Limit | Status | Tracks |'); +line('|---|---:|---:|---|---|'); + +let failed = 0; +for (const c of checks) { + const ok = c.actual <= c.limit; + if (!ok) failed++; + const status = ok ? ':white_check_mark: pass' : ':x: **fail**'; + line( + `| ${c.name} | ${c.formatter(c.actual)} | ${c.formatter(c.limit)} | ${status} | ${c.tracks} |`, + ); +} + +line(''); +if (failed > 0) { + line( + `**${failed} parity check(s) failed.** See linked issues for root-cause tracking; the benchmark doc PR (if opened) captures the raw numbers.`, + ); + process.exit(1); +} + +line('All parity checks passed.'); From b78941115443009fc5cc03a098f7fef4f5f62a32 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 24 Apr 2026 15:33:07 -0600 Subject: [PATCH 2/2] fix: guard parity gate against missing/zero engine fields (#1014) - Missing engine now exits 0 (gate passes) to match the documented behavior in the script-level JSDoc, instead of firing a false-alarm red job when only one engine finishes. - Add nullish/zero guards to file-set gap and DB-size ratio checks so undefined or zero fields in a partial result no longer produce NaN or Infinity. - Wrap the result-file read in try/catch to print a clean diagnostic instead of a raw ENOENT stack trace. - Drop the unused path import. --- scripts/benchmark-parity-gate.mjs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/scripts/benchmark-parity-gate.mjs b/scripts/benchmark-parity-gate.mjs index ad2e5302..fa2ff59b 100644 --- a/scripts/benchmark-parity-gate.mjs +++ b/scripts/benchmark-parity-gate.mjs @@ -20,7 +20,6 @@ * the missing engine. */ import fs from 'node:fs'; -import path from 'node:path'; const resultFile = process.argv[2]; if (!resultFile) { @@ -28,7 +27,13 @@ if (!resultFile) { process.exit(2); } -const result = JSON.parse(fs.readFileSync(resultFile, 'utf8')); +let result; +try { + result = JSON.parse(fs.readFileSync(resultFile, 'utf8')); +} catch (err) { + console.error(`Failed to read ${resultFile}: ${err.message}`); + process.exit(2); +} const { wasm, native, version } = result; const summaryFile = process.env.GITHUB_STEP_SUMMARY; @@ -46,8 +51,8 @@ line(''); if (!wasm || !native) { const missing = [!wasm && 'wasm', !native && 'native'].filter(Boolean).join(', '); - line(`**FAIL:** missing engine result for: ${missing}. Benchmark cannot assert parity.`); - process.exit(1); + line(`**SKIP:** missing engine result for: ${missing}. Cannot assert parity — gate passes.`); + process.exit(0); } // ── Thresholds ───────────────────────────────────────────────────────── @@ -60,14 +65,14 @@ if (!wasm || !native) { const checks = [ { name: 'File-set gap (|wasm − native|)', - actual: Math.abs(wasm.files - native.files), + actual: Math.abs((wasm.files ?? 0) - (native.files ?? 0)), limit: 2, formatter: (v) => String(v), tracks: '#1011', }, { name: 'DB size ratio (native / wasm)', - actual: native.dbSizeBytes / wasm.dbSizeBytes, + actual: (native.dbSizeBytes ?? 0) / Math.max(wasm.dbSizeBytes ?? 1, 1), limit: 1.02, formatter: (v) => v.toFixed(3), tracks: '#1010',