diff --git a/AGENTS.md b/AGENTS.md index afbf1d7..515d724 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -748,7 +748,9 @@ npm run push -- assistants # Push only assistants npm run push -- resources//assistants/my-agent.md # Push single file npm run push -- # Push multiple specific files (one state write) npm run push -- --dry-run # Preview without applying any platform changes +npm run push -- --strict # Abort push if any validator returns an error npm run apply -- # Pull then push (full sync) +npm run validate -- # Lint resources locally (fails fast on schema drift) # Testing npm run call -- -a # Call an assistant via WebSocket diff --git a/improvements.md b/improvements.md index 33bacaa..97cb23f 100644 --- a/improvements.md +++ b/improvements.md @@ -59,19 +59,19 @@ you which stack PR closes the row.** | 5 | `push --dry-run` | Cheapest operator-safety win | None | RESOLVED 2026-04-30 (Stack C) | | 6 | API-level optimistic concurrency | Server-side conflict rejection | Platform | Deferred (Stack I, gated) | | 7 | Voice edits drop pronunciation-dictionary attachments | Silent regression on Cartesia + 11labs voice edits | #4 | Open (Stack G planned) | -| 8 | Dashboard prompt edits can in-place duplicate the prompt | Two stacked prompt versions = stitched output | None | Open (Stack D planned) | -| 9 | Provider-specific voice schema mismatch (push 400) | `voice.speed` vs `voice.generationConfig.speed` | None | Partial — doc cheat-sheet (Stack A) | +| 8 | Dashboard prompt edits can in-place duplicate the prompt | Two stacked prompt versions = stitched output | None | Partial — Stack D heuristic | +| 9 | Provider-specific voice schema mismatch (push 400) | `voice.speed` vs `voice.generationConfig.speed` | None | RESOLVED 2026-04-30 (Stack D + A) | | 10 | Targeted assistant push mints duplicate tools | Re-pushing assistant duplicates `end-call-*` tools | #4 | Partial | -| 11 | Bidirectional SO ↔ assistant lockstep has no validation | One-sided edits silently inconsistent | None | Open (Stack D planned) | +| 11 | Bidirectional SO ↔ assistant lockstep has no validation | One-sided edits silently inconsistent | None | RESOLVED 2026-04-30 (Stack D) | | 12 | State file accumulates UUIDs without source files | Silent gitops drift | None | Partial | | 13 | `.agent/` and `.claude/handoffs/` not gitignored | `git add -A` sweeps PII handoff scratch | None | RESOLVED 2026-04-30 (Stack A) | | 14 | Multi-file push undocumented | Discoverability | None | RESOLVED 2026-04-30 (Stack A) | | 15 | Scoped push rewrites entire state file | Pre-existing drift sweeps into focused commits | #4 | Open (Stack J planned) | | 16 | No CLI runner for simulation suites | Engine pushes them, can't run them | None | Open (Stack E planned) | | 17 | State file key-order churn produces noisy diffs | Reorderings hide real changes | None | RESOLVED 2026-04-30 (Stack B) | -| 18 | Structured-output `name` capped at 40 chars (no warning) | Push fails partway after partial application | None | Open (Stack D planned) | -| 19 | No `maxTokens` floor warning for tool-using assistants | `maxTokens: 1` bricks the assistant silently | None | Open (Stack D planned) | -| 20 | Prompt vocabulary leaks into TTS | `Reason.` becomes verbal contaminant | None | Open (Stack D heuristic planned) | +| 18 | Structured-output `name` capped at 40 chars (no warning) | Push fails partway after partial application | None | RESOLVED 2026-04-30 (Stack D) | +| 19 | No `maxTokens` floor warning for tool-using assistants | `maxTokens: 1` bricks the assistant silently | None | RESOLVED 2026-04-30 (Stack D) | +| 20 | Prompt vocabulary leaks into TTS | `Reason.` becomes verbal contaminant | None | Partial — Stack D heuristic | --- diff --git a/package.json b/package.json index 5822b59..28c69f7 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "call": "bash -c 'exec tsx src/call-cmd.ts \"$@\" 2> >(grep --line-buffered -v \"buffer underflow\" >&2)' --", "cleanup": "tsx src/cleanup-cmd.ts", "eval": "tsx src/eval.ts", + "validate": "tsx src/validate-cmd.ts", "build": "tsc --noEmit", "test": "node --import tsx --test tests/*.test.ts" }, diff --git a/src/config.ts b/src/config.ts index 6510b53..6288c4a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -86,6 +86,7 @@ function parseFlags(): { forceDelete: boolean; bootstrapSync: boolean; dryRun: boolean; + strictValidation: boolean; applyFilter: ApplyFilter; } { const args = process.argv.slice(3); @@ -93,11 +94,13 @@ function parseFlags(): { forceDelete: boolean; bootstrapSync: boolean; dryRun: boolean; + strictValidation: boolean; applyFilter: ApplyFilter; } = { forceDelete: args.includes("--force"), bootstrapSync: args.includes("--bootstrap"), dryRun: args.includes("--dry-run"), + strictValidation: args.includes("--strict"), applyFilter: {}, }; @@ -108,7 +111,12 @@ function parseFlags(): { const arg = args[i]; if (!arg) continue; - if (arg === "--force" || arg === "--bootstrap" || arg === "--dry-run") + if ( + arg === "--force" || + arg === "--bootstrap" || + arg === "--dry-run" || + arg === "--strict" + ) continue; // --confirm : consumed by cleanup.ts directly. Eat the value here so @@ -243,6 +251,7 @@ export const { forceDelete: FORCE_DELETE, bootstrapSync: BOOTSTRAP_SYNC, dryRun: DRY_RUN, + strictValidation: STRICT_VALIDATION, applyFilter: APPLY_FILTER, } = parseFlags(); diff --git a/src/push.ts b/src/push.ts index 078421b..d051664 100644 --- a/src/push.ts +++ b/src/push.ts @@ -6,10 +6,12 @@ import { VAPI_BASE_URL, FORCE_DELETE, DRY_RUN, + STRICT_VALIDATION, APPLY_FILTER, BASE_DIR, removeExcludedKeys, } from "./config.ts"; +import { summarizeFindings, validateResources } from "./validate.ts"; import { loadState, saveState } from "./state.ts"; import { loadResources, loadSingleResource, FOLDER_MAP } from "./resources.ts"; import { fetchAllResources, resourceIdMatchesName, runPull } from "./pull.ts"; @@ -909,6 +911,30 @@ async function main(): Promise { state = await maybeBootstrapState(loadedResources, state); + // Run client-side validators against the loaded resource set. In default + // mode, errors are surfaced as warnings so a single bad spec doesn't block + // an otherwise-good push. With --strict, any error-severity finding aborts + // before any API call. + console.log("\n🔎 Running validators..."); + const findings = validateResources(loadedResources); + if (findings.length > 0) { + console.log(summarizeFindings(findings)); + } else { + console.log(" ✅ No validation issues."); + } + const errorCount = findings.filter((f) => f.severity === "error").length; + if (errorCount > 0) { + if (STRICT_VALIDATION) { + console.error( + `\n❌ Validation failed (${errorCount} error(s)). --strict refuses to push. Fix the issues above or drop --strict.`, + ); + process.exit(1); + } + console.warn( + ` ⚠️ ${errorCount} validation error(s) detected — push will continue (use --strict to abort on errors).`, + ); + } + // Resolve credential names → UUIDs in all resource data before applying const credMap = credentialForwardMap(state); if (credMap.size > 0) { diff --git a/src/validate-cmd.ts b/src/validate-cmd.ts new file mode 100644 index 0000000..e4f9f08 --- /dev/null +++ b/src/validate-cmd.ts @@ -0,0 +1,63 @@ +// CLI entry: `npm run validate -- ` +// +// Loads the same resource shape as `push.ts` would (so the validator runs +// against exactly what would ship), then runs all client-side validators +// and prints findings. Exit code 0 if no errors, 1 if any error-severity +// finding is present. + +import { resolve } from "path"; +import { fileURLToPath } from "url"; +import { VAPI_ENV, VAPI_BASE_URL } from "./config.ts"; +import { loadResources } from "./resources.ts"; +import { summarizeFindings, validateResources } from "./validate.ts"; +import type { LoadedResources } from "./types.ts"; + +async function main(): Promise { + console.log( + "═══════════════════════════════════════════════════════════════", + ); + console.log(`🔎 Vapi GitOps Validate - Environment: ${VAPI_ENV}`); + console.log(` API: ${VAPI_BASE_URL}`); + console.log( + "═══════════════════════════════════════════════════════════════\n", + ); + + console.log("📂 Loading resources...\n"); + const resources: LoadedResources = { + tools: await loadResources("tools"), + structuredOutputs: await loadResources("structuredOutputs"), + assistants: await loadResources("assistants"), + squads: await loadResources("squads"), + personalities: await loadResources("personalities"), + scenarios: await loadResources("scenarios"), + simulations: await loadResources("simulations"), + simulationSuites: await loadResources("simulationSuites"), + evals: await loadResources("evals"), + }; + + const findings = validateResources(resources); + console.log(`\n${summarizeFindings(findings)}\n`); + + const errorCount = findings.filter((f) => f.severity === "error").length; + if (errorCount > 0) { + console.error( + `❌ Validation failed with ${errorCount} error(s). Fix the issues above before pushing.`, + ); + process.exit(1); + } + console.log("✅ Validation passed."); +} + +const isMainModule = + process.argv[1] !== undefined && + resolve(process.argv[1]) === fileURLToPath(import.meta.url); + +if (isMainModule) { + main().catch((error) => { + console.error( + "\n❌ Validation failed:", + error instanceof Error ? error.message : error, + ); + process.exit(1); + }); +} diff --git a/src/validate.ts b/src/validate.ts new file mode 100644 index 0000000..fd17841 --- /dev/null +++ b/src/validate.ts @@ -0,0 +1,465 @@ +// ───────────────────────────────────────────────────────────────────────────── +// Client-side validators — fail-fast schema / lockstep / shape checks +// +// Designed to catch the classes of errors that would otherwise only surface +// when the Vapi API returns a 400 mid-push. The push pipeline runs these in +// warning-only mode by default; `--strict` promotes warnings to blocking +// errors that abort before any API call. +// +// Sources for each check are documented in `improvements.md`: +// - Name length cap (40 chars) → improvements #18 +// - SO ↔ assistant lockstep → improvements #11 +// - Prompt duplication heuristics → improvements #8, #20 +// - maxTokens floor for tool-using assistants → improvements #19 +// - Per-provider voice schema → improvements #9 +// ───────────────────────────────────────────────────────────────────────────── + +import type { LoadedResources, ResourceFile, ResourceType } from "./types.ts"; + +export type ValidationSeverity = "warn" | "error"; + +export interface ValidationFinding { + severity: ValidationSeverity; + type: ResourceType; + resourceId: string; + rule: string; + message: string; + fieldPath?: string; +} + +const NAME_MAX_LEN = 40; + +// ───────────────────────────────────────────────────────────────────────────── +// Check 1: Name length cap (40 chars) +// ───────────────────────────────────────────────────────────────────────────── + +function checkNameLengths( + resources: LoadedResources, +): ValidationFinding[] { + const findings: ValidationFinding[] = []; + + for (const assistant of resources.assistants) { + const name = (assistant.data as { name?: unknown }).name; + if (typeof name === "string" && name.length > NAME_MAX_LEN) { + findings.push({ + severity: "error", + type: "assistants", + resourceId: assistant.resourceId, + rule: "name-length", + message: `assistant name "${name}" is ${name.length} chars; Vapi caps at ${NAME_MAX_LEN}`, + fieldPath: "name", + }); + } + } + + for (const scenario of resources.scenarios) { + const evals = (scenario.data as { evaluations?: unknown }).evaluations; + if (!Array.isArray(evals)) continue; + evals.forEach((evalEntry, idx) => { + const so = (evalEntry as { structuredOutput?: unknown }).structuredOutput; + if (!so || typeof so !== "object") return; + const soName = (so as { name?: unknown }).name; + if (typeof soName === "string" && soName.length > NAME_MAX_LEN) { + findings.push({ + severity: "error", + type: "scenarios", + resourceId: scenario.resourceId, + rule: "name-length", + message: `evaluations[${idx}].structuredOutput.name "${soName}" is ${soName.length} chars; Vapi caps at ${NAME_MAX_LEN}`, + fieldPath: `evaluations[${idx}].structuredOutput.name`, + }); + } + }); + } + + return findings; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Check 2: SO ↔ assistant bidirectional lockstep +// +// Each edge is declared on both sides: +// structured-output.assistant_ids[] = [assistantA, assistantB] +// assistantA.artifactPlan.structuredOutputIds[] = [SO] +// +// A one-sided declaration is a silent inconsistency. +// ───────────────────────────────────────────────────────────────────────────── + +function getAssistantStructuredOutputIds( + assistant: ResourceFile, +): string[] { + const ap = (assistant.data as { artifactPlan?: unknown }).artifactPlan; + if (!ap || typeof ap !== "object") return []; + const ids = (ap as { structuredOutputIds?: unknown }).structuredOutputIds; + return Array.isArray(ids) + ? ids.filter((s): s is string => typeof s === "string") + : []; +} + +function getStructuredOutputAssistantIds( + so: ResourceFile, +): string[] { + const ids = (so.data as { assistant_ids?: unknown }).assistant_ids; + return Array.isArray(ids) + ? ids.filter((s): s is string => typeof s === "string") + : []; +} + +function checkLockstep(resources: LoadedResources): ValidationFinding[] { + const findings: ValidationFinding[] = []; + + const assistantById = new Map( + resources.assistants.map((a) => [a.resourceId, a]), + ); + const soById = new Map( + resources.structuredOutputs.map((s) => [s.resourceId, s]), + ); + + // Forward: SO declares assistantA — assistantA must list SO. + for (const so of resources.structuredOutputs) { + const assistantIds = getStructuredOutputAssistantIds(so); + for (const aid of assistantIds) { + const assistant = assistantById.get(aid); + if (!assistant) continue; // missing-reference is a different class + const assistantSOs = getAssistantStructuredOutputIds(assistant); + if (!assistantSOs.includes(so.resourceId)) { + findings.push({ + severity: "warn", + type: "structuredOutputs", + resourceId: so.resourceId, + rule: "so-assistant-lockstep", + message: + `structured output "${so.resourceId}" lists assistant "${aid}" in assistant_ids ` + + `but assistant "${aid}" does NOT list this SO in artifactPlan.structuredOutputIds`, + fieldPath: "assistant_ids", + }); + } + } + } + + // Reverse: assistant declares SO — SO must list assistant. + for (const assistant of resources.assistants) { + const soIds = getAssistantStructuredOutputIds(assistant); + for (const sid of soIds) { + const so = soById.get(sid); + if (!so) continue; + const soAssistants = getStructuredOutputAssistantIds(so); + if (!soAssistants.includes(assistant.resourceId)) { + findings.push({ + severity: "warn", + type: "assistants", + resourceId: assistant.resourceId, + rule: "so-assistant-lockstep", + message: + `assistant "${assistant.resourceId}" lists SO "${sid}" in artifactPlan.structuredOutputIds ` + + `but SO "${sid}" does NOT list this assistant in assistant_ids`, + fieldPath: "artifactPlan.structuredOutputIds", + }); + } + } + } + + return findings; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Check 3: Prompt duplication heuristics +// +// Catches the dashboard "paste-on-top" footgun (improvements #8) and the +// "tool-arg name leaks into TTS" risk (improvements #20). +// ───────────────────────────────────────────────────────────────────────────── + +function getSystemPrompt(assistant: ResourceFile): string | null { + const model = (assistant.data as { model?: unknown }).model; + if (!model || typeof model !== "object") return null; + const messages = (model as { messages?: unknown }).messages; + if (!Array.isArray(messages)) return null; + const sys = messages.find( + (m): m is { role: string; content: string } => + typeof m === "object" && + m !== null && + (m as { role?: unknown }).role === "system" && + typeof (m as { content?: unknown }).content === "string", + ); + return sys?.content ?? null; +} + +const RISKY_HEADINGS = [ + "CONTINUITY ON ENTRY", + "CLOSEOUT FLOW STRUCTURE", +]; + +function checkPromptDuplications( + resources: LoadedResources, +): ValidationFinding[] { + const findings: ValidationFinding[] = []; + + for (const assistant of resources.assistants) { + const prompt = getSystemPrompt(assistant); + if (!prompt) continue; + + // Same H1 heading appearing twice + const h1Matches = prompt.match(/^# .+$/gm) ?? []; + const seenH1 = new Set(); + for (const h1 of h1Matches) { + if (seenH1.has(h1)) { + findings.push({ + severity: "warn", + type: "assistants", + resourceId: assistant.resourceId, + rule: "prompt-duplicate-h1", + message: + `system prompt contains duplicate H1 heading "${h1.trim()}" — ` + + `likely a paste-on-top duplication from the dashboard prompt editor`, + }); + break; // one warning per assistant is enough + } + seenH1.add(h1); + } + + // Risky keywords appearing more than once + for (const heading of RISKY_HEADINGS) { + const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const re = new RegExp(escaped, "g"); + const occurrences = prompt.match(re)?.length ?? 0; + if (occurrences > 1) { + findings.push({ + severity: "warn", + type: "assistants", + resourceId: assistant.resourceId, + rule: "prompt-duplicate-block", + message: + `system prompt has ${occurrences} occurrences of "${heading}" — ` + + `block likely duplicated`, + }); + } + } + } + + return findings; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Check 4: maxTokens floor for tool-using assistants +// +// `maxTokens` < length of (tool argument schema JSON) means the model can't +// emit a complete tool call payload — push succeeds, the assistant bricks at +// runtime. Compute a soft floor and warn under it. +// ───────────────────────────────────────────────────────────────────────────── + +function getAssistantMaxTokens(assistant: ResourceFile): number | undefined { + const model = (assistant.data as { model?: unknown }).model; + if (!model || typeof model !== "object") return undefined; + const mt = (model as { maxTokens?: unknown }).maxTokens; + return typeof mt === "number" ? mt : undefined; +} + +function getAssistantToolIds(assistant: ResourceFile): string[] { + const model = (assistant.data as { model?: unknown }).model; + if (!model || typeof model !== "object") return []; + const ids = (model as { toolIds?: unknown }).toolIds; + return Array.isArray(ids) + ? ids.filter((s): s is string => typeof s === "string") + : []; +} + +function getToolParametersSize(tool: ResourceFile): number { + const fn = (tool.data as { function?: unknown }).function; + if (!fn || typeof fn !== "object") return 0; + const params = (fn as { parameters?: unknown }).parameters; + if (params === undefined) return 0; + try { + return JSON.stringify(params).length; + } catch { + return 0; + } +} + +function checkMaxTokensFloor( + resources: LoadedResources, +): ValidationFinding[] { + const findings: ValidationFinding[] = []; + const toolById = new Map(resources.tools.map((t) => [t.resourceId, t])); + + for (const assistant of resources.assistants) { + const maxTokens = getAssistantMaxTokens(assistant); + if (maxTokens === undefined) continue; + + const toolIds = getAssistantToolIds(assistant); + if (toolIds.length === 0) continue; + + let argsBudget = 0; + for (const tid of toolIds) { + const tool = toolById.get(tid); + if (tool) argsBudget += getToolParametersSize(tool); + } + if (argsBudget === 0) continue; + + const floor = 25 + argsBudget; + if (maxTokens < floor) { + findings.push({ + severity: "warn", + type: "assistants", + resourceId: assistant.resourceId, + rule: "max-tokens-floor", + message: + `model.maxTokens=${maxTokens} may truncate tool-call args; ` + + `recommended floor for attached tools is ${floor} ` + + `(25 + sum of tool parameter schema sizes)`, + fieldPath: "model.maxTokens", + }); + } + } + + return findings; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Check 5: Per-provider voice schema +// +// See docs/learnings/voice-providers.md for the full layout. +// ───────────────────────────────────────────────────────────────────────────── + +const CARTESIA_FORBIDDEN_TOP_LEVEL = new Set([ + "speed", + "stability", + "similarityBoost", + "enableSsmlParsing", +]); +const ELEVENLABS_FORBIDDEN_TOP_LEVEL = new Set(["generationConfig"]); + +function checkVoiceBlock( + type: ResourceType, + resourceId: string, + voice: unknown, + fieldPath: string, +): ValidationFinding[] { + if (!voice || typeof voice !== "object") return []; + const findings: ValidationFinding[] = []; + const v = voice as Record; + const provider = v.provider; + + if (provider === "cartesia") { + for (const key of Object.keys(v)) { + if (CARTESIA_FORBIDDEN_TOP_LEVEL.has(key)) { + findings.push({ + severity: "error", + type, + resourceId, + rule: "voice-provider-schema", + message: + `Cartesia voice rejects top-level "${key}" — ` + + (key === "speed" + ? "use voice.generationConfig.speed (0.6–1.5) instead" + : key === "enableSsmlParsing" + ? "Cartesia parses SSML natively; remove the field" + : `field is 11labs-only; remove for Cartesia`), + fieldPath: `${fieldPath}.${key}`, + }); + } + } + } else if (provider === "11labs") { + for (const key of Object.keys(v)) { + if (ELEVENLABS_FORBIDDEN_TOP_LEVEL.has(key)) { + findings.push({ + severity: "error", + type, + resourceId, + rule: "voice-provider-schema", + message: + `11labs voice rejects "${key}" — that's a Cartesia path. ` + + `Move speed to top-level voice.speed (0.7–1.2)`, + fieldPath: `${fieldPath}.${key}`, + }); + } + } + } + + return findings; +} + +function checkVoiceSchemas( + resources: LoadedResources, +): ValidationFinding[] { + const findings: ValidationFinding[] = []; + + for (const assistant of resources.assistants) { + findings.push( + ...checkVoiceBlock( + "assistants", + assistant.resourceId, + (assistant.data as { voice?: unknown }).voice, + "voice", + ), + ); + } + + for (const squad of resources.squads) { + const overrides = (squad.data as { membersOverrides?: unknown }).membersOverrides; + if (overrides && typeof overrides === "object") { + findings.push( + ...checkVoiceBlock( + "squads", + squad.resourceId, + (overrides as { voice?: unknown }).voice, + "membersOverrides.voice", + ), + ); + } + + const members = (squad.data as { members?: unknown }).members; + if (Array.isArray(members)) { + members.forEach((m, idx) => { + if (!m || typeof m !== "object") return; + const mo = (m as { assistantOverrides?: unknown }).assistantOverrides; + if (!mo || typeof mo !== "object") return; + findings.push( + ...checkVoiceBlock( + "squads", + squad.resourceId, + (mo as { voice?: unknown }).voice, + `members[${idx}].assistantOverrides.voice`, + ), + ); + }); + } + } + + return findings; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Public entry: run all checks +// ───────────────────────────────────────────────────────────────────────────── + +export function validateResources( + resources: LoadedResources, +): ValidationFinding[] { + return [ + ...checkNameLengths(resources), + ...checkLockstep(resources), + ...checkPromptDuplications(resources), + ...checkMaxTokensFloor(resources), + ...checkVoiceSchemas(resources), + ]; +} + +// Format a single finding as a human-friendly line for the CLI output. +export function formatFinding(f: ValidationFinding): string { + const icon = f.severity === "error" ? "❌" : "⚠️ "; + const where = f.fieldPath + ? `${f.type}/${f.resourceId} (${f.fieldPath})` + : `${f.type}/${f.resourceId}`; + return ` ${icon} [${f.rule}] ${where}: ${f.message}`; +} + +// Group findings into a summary block. Returns the formatted text. +export function summarizeFindings(findings: ValidationFinding[]): string { + if (findings.length === 0) return "✅ No validation issues."; + const errors = findings.filter((f) => f.severity === "error"); + const warns = findings.filter((f) => f.severity === "warn"); + const lines: string[] = []; + lines.push(`📋 Validation: ${errors.length} error(s), ${warns.length} warning(s)`); + for (const f of findings) lines.push(formatFinding(f)); + return lines.join("\n"); +} diff --git a/tests/validate.test.ts b/tests/validate.test.ts new file mode 100644 index 0000000..805180c --- /dev/null +++ b/tests/validate.test.ts @@ -0,0 +1,365 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { validateResources } from "../src/validate.ts"; +import type { LoadedResources, ResourceFile } from "../src/types.ts"; + +// Stack D — validator regression coverage. Each spec exercises one rule +// against a minimal in-memory fixture so the validator's signal/noise can +// be pinned without filesystem fixtures. + +function emptyResources(): LoadedResources { + return { + tools: [], + structuredOutputs: [], + assistants: [], + squads: [], + personalities: [], + scenarios: [], + simulations: [], + simulationSuites: [], + evals: [], + }; +} + +function makeAssistant( + resourceId: string, + data: Record, +): ResourceFile> { + return { resourceId, filePath: `/fake/${resourceId}.md`, data }; +} + +function makeTool( + resourceId: string, + data: Record, +): ResourceFile> { + return { resourceId, filePath: `/fake/${resourceId}.yml`, data }; +} + +function makeSO( + resourceId: string, + data: Record, +): ResourceFile> { + return { resourceId, filePath: `/fake/${resourceId}.yml`, data }; +} + +function makeScenario( + resourceId: string, + data: Record, +): ResourceFile> { + return { resourceId, filePath: `/fake/${resourceId}.yml`, data }; +} + +function makeSquad( + resourceId: string, + data: Record, +): ResourceFile> { + return { resourceId, filePath: `/fake/${resourceId}.yml`, data }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Rule: name-length (assistant + scenario evaluations[].structuredOutput.name) +// ───────────────────────────────────────────────────────────────────────────── + +test("name-length: assistant name longer than 40 chars is flagged as error", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("ok-name", { name: "OK Name" }), + makeAssistant("too-long", { + name: "this-name-is-definitely-more-than-forty-characters-long", + }), + ); + + const findings = validateResources(r).filter((f) => f.rule === "name-length"); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.severity, "error"); + assert.equal(findings[0]!.resourceId, "too-long"); + assert.equal(findings[0]!.fieldPath, "name"); +}); + +test("name-length: scenario evaluations[].structuredOutput.name >40 chars flagged", () => { + const r = emptyResources(); + r.scenarios.push( + makeScenario("scenario-1", { + evaluations: [ + { structuredOutput: { name: "ok_short" } }, + { + structuredOutput: { + name: "assistant_attempted_live_conversation_after_voicemail", + }, + }, + ], + }), + ); + + const findings = validateResources(r).filter((f) => f.rule === "name-length"); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.fieldPath, "evaluations[1].structuredOutput.name"); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Rule: SO ↔ assistant lockstep +// ───────────────────────────────────────────────────────────────────────────── + +test("so-assistant-lockstep: SO declares assistant but assistant doesn't list SO", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("agent-a", { + artifactPlan: { structuredOutputIds: [] }, + }), + ); + r.structuredOutputs.push( + makeSO("customer-data", { assistant_ids: ["agent-a"] }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "so-assistant-lockstep", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.severity, "warn"); + assert.equal(findings[0]!.type, "structuredOutputs"); +}); + +test("so-assistant-lockstep: assistant declares SO but SO doesn't list assistant", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("agent-a", { + artifactPlan: { structuredOutputIds: ["customer-data"] }, + }), + ); + r.structuredOutputs.push(makeSO("customer-data", { assistant_ids: [] })); + + const findings = validateResources(r).filter( + (f) => f.rule === "so-assistant-lockstep", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.type, "assistants"); +}); + +test("so-assistant-lockstep: bidirectional declaration produces no findings", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("agent-a", { + artifactPlan: { structuredOutputIds: ["customer-data"] }, + }), + ); + r.structuredOutputs.push( + makeSO("customer-data", { assistant_ids: ["agent-a"] }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "so-assistant-lockstep", + ); + assert.equal(findings.length, 0); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Rule: prompt-duplicate-h1 / prompt-duplicate-block +// ───────────────────────────────────────────────────────────────────────────── + +test("prompt-duplicate-h1: same H1 appearing twice flagged", () => { + const r = emptyResources(); + const promptWithDup = `# Identity + +You are foo. + +# Identity + +You are foo again.`; + r.assistants.push( + makeAssistant("dup-h1", { + model: { messages: [{ role: "system", content: promptWithDup }] }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "prompt-duplicate-h1", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.severity, "warn"); +}); + +test("prompt-duplicate-block: CONTINUITY ON ENTRY appearing twice flagged", () => { + const r = emptyResources(); + const promptWithDup = `# Identity + +CONTINUITY ON ENTRY: do X. + +CONTINUITY ON ENTRY: do Y.`; + r.assistants.push( + makeAssistant("dup-block", { + model: { messages: [{ role: "system", content: promptWithDup }] }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "prompt-duplicate-block", + ); + assert.equal(findings.length, 1); +}); + +test("prompt-duplicate: clean prompt produces no findings", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("clean", { + model: { + messages: [{ role: "system", content: "# Identity\n\nYou are foo." }], + }, + }), + ); + + const findings = validateResources(r).filter( + (f) => + f.rule === "prompt-duplicate-h1" || f.rule === "prompt-duplicate-block", + ); + assert.equal(findings.length, 0); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Rule: max-tokens-floor +// ───────────────────────────────────────────────────────────────────────────── + +test("max-tokens-floor: assistant with maxTokens=1 and tool warns", () => { + const r = emptyResources(); + r.tools.push( + makeTool("end-call", { + function: { + parameters: { + type: "object", + properties: { + reason: { type: "string", description: "why ending" }, + }, + required: ["reason"], + }, + }, + }), + ); + r.assistants.push( + makeAssistant("classifier", { + model: { toolIds: ["end-call"], maxTokens: 1 }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "max-tokens-floor", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.severity, "warn"); +}); + +test("max-tokens-floor: assistant with high maxTokens silent", () => { + const r = emptyResources(); + r.tools.push( + makeTool("end-call", { + function: { parameters: { type: "object", properties: {} } }, + }), + ); + r.assistants.push( + makeAssistant("normal", { + model: { toolIds: ["end-call"], maxTokens: 1000 }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "max-tokens-floor", + ); + assert.equal(findings.length, 0); +}); + +test("max-tokens-floor: assistant without tools is silent", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("toolless", { model: { maxTokens: 1 } }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "max-tokens-floor", + ); + assert.equal(findings.length, 0); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Rule: voice-provider-schema +// ───────────────────────────────────────────────────────────────────────────── + +test("voice-provider-schema: cartesia rejects voice.speed at top level", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("cartesia-bad", { + voice: { provider: "cartesia", speed: 1.0, voiceId: "x" }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "voice-provider-schema", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.severity, "error"); + assert.equal(findings[0]!.fieldPath, "voice.speed"); +}); + +test("voice-provider-schema: cartesia rejects enableSsmlParsing", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("cartesia-ssml", { + voice: { provider: "cartesia", enableSsmlParsing: true, voiceId: "x" }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "voice-provider-schema", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.fieldPath, "voice.enableSsmlParsing"); +}); + +test("voice-provider-schema: cartesia accepts generationConfig.speed", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("cartesia-good", { + voice: { + provider: "cartesia", + voiceId: "x", + generationConfig: { speed: 1.1 }, + }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "voice-provider-schema", + ); + assert.equal(findings.length, 0); +}); + +test("voice-provider-schema: 11labs rejects generationConfig", () => { + const r = emptyResources(); + r.assistants.push( + makeAssistant("eleven-bad", { + voice: { + provider: "11labs", + voiceId: "x", + generationConfig: { speed: 1.0 }, + }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "voice-provider-schema", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.fieldPath, "voice.generationConfig"); +}); + +test("voice-provider-schema: cartesia membersOverrides.voice in squad checked", () => { + const r = emptyResources(); + r.squads.push( + makeSquad("squad-bad", { + membersOverrides: { voice: { provider: "cartesia", speed: 1.0 } }, + }), + ); + + const findings = validateResources(r).filter( + (f) => f.rule === "voice-provider-schema", + ); + assert.equal(findings.length, 1); + assert.equal(findings[0]!.fieldPath, "membersOverrides.voice.speed"); +});