diff --git a/AGENTS.md b/AGENTS.md index 515d724..d3bb732 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -751,6 +751,7 @@ npm run push -- --dry-run # Preview without applying an npm run push -- --strict # Abort push if any validator returns an error npm run apply -- # Pull then push (full sync) npm run validate -- # Lint resources locally (fails fast on schema drift) +npm run sim -- --suite --target # Run a simulation suite against an assistant/squad # Testing npm run call -- -a # Call an assistant via WebSocket diff --git a/improvements.md b/improvements.md index 97cb23f..8d3a1b5 100644 --- a/improvements.md +++ b/improvements.md @@ -67,7 +67,7 @@ you which stack PR closes the row.** | 13 | `.agent/` and `.claude/handoffs/` not gitignored | `git add -A` sweeps PII handoff scratch | None | RESOLVED 2026-04-30 (Stack A) | | 14 | Multi-file push undocumented | Discoverability | None | RESOLVED 2026-04-30 (Stack A) | | 15 | Scoped push rewrites entire state file | Pre-existing drift sweeps into focused commits | #4 | Open (Stack J planned) | -| 16 | No CLI runner for simulation suites | Engine pushes them, can't run them | None | Open (Stack E planned) | +| 16 | No CLI runner for simulation suites | Engine pushes them, can't run them | None | RESOLVED 2026-04-30 (Stack E) | | 17 | State file key-order churn produces noisy diffs | Reorderings hide real changes | None | RESOLVED 2026-04-30 (Stack B) | | 18 | Structured-output `name` capped at 40 chars (no warning) | Push fails partway after partial application | None | RESOLVED 2026-04-30 (Stack D) | | 19 | No `maxTokens` floor warning for tool-using assistants | `maxTokens: 1` bricks the assistant silently | None | RESOLVED 2026-04-30 (Stack D) | diff --git a/package.json b/package.json index 28c69f7..ef33816 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "cleanup": "tsx src/cleanup-cmd.ts", "eval": "tsx src/eval.ts", "validate": "tsx src/validate-cmd.ts", + "sim": "tsx src/sim-cmd.ts", "build": "tsc --noEmit", "test": "node --import tsx --test tests/*.test.ts" }, diff --git a/src/sim-cmd.ts b/src/sim-cmd.ts new file mode 100644 index 0000000..74e40e5 --- /dev/null +++ b/src/sim-cmd.ts @@ -0,0 +1,155 @@ +// CLI entry: `npm run sim -- --suite --target ` +// +// Distinct from `npm run eval` (legacy /evals endpoint). See AGENTS.md and +// improvements.md #16 for the rationale. + +import { + formatSummary, + loadEnvFile, + loadStateFile, + resolveSelection, + resolveTarget, + runSimulation, +} from "./sim.ts"; + +function printUsage(): void { + console.error( + [ + "Usage:", + " npm run sim -- --suite --target ", + " npm run sim -- --simulations , --target ", + "", + "Options:", + " --suite Run an entire simulation suite by local resource name", + " --simulations Run one or more simulations by comma-separated local names", + " --target Local assistant or squad name (resolves to UUID via state)", + " --transport voice|chat Transport (default: voice; chat is faster/cheaper)", + " --iterations N Override default iteration count", + " --watch Tail status until completion (default: on)", + "", + "Examples:", + " npm run sim -- my-org --suite booking-tests --target intake-agent", + " npm run sim -- my-org --simulations happy-path,edge-case --target main-agent --transport chat", + ].join("\n"), + ); +} + +interface ParsedArgs { + env: string; + suite?: string; + simulations?: string; + assistant?: string; + squad?: string; + transport?: "voice" | "chat"; + iterations?: number; + watch: boolean; +} + +function parseArgs(): ParsedArgs { + const args = process.argv.slice(2); + const env = args[0]; + if (!env) { + printUsage(); + process.exit(1); + } + const SLUG_RE = /^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/; + if (!SLUG_RE.test(env)) { + console.error(`❌ Invalid org name: ${env}`); + process.exit(1); + } + + const parsed: ParsedArgs = { env, watch: true }; + + for (let i = 1; i < args.length; i++) { + const arg = args[i]; + if (arg === "--suite") parsed.suite = args[++i]; + else if (arg === "--simulations") parsed.simulations = args[++i]; + else if (arg === "--target") { + // We don't know yet whether target is an assistant or squad — defer + // resolution to the state lookup. Try assistant first; resolveTarget() + // accepts either argument key, so we set the candidate in `assistant` + // and let `resolveTarget` fall through to `squad` if not found. + // For clarity, we accept --assistant / --squad as explicit alternatives. + parsed.assistant = args[++i]; + } else if (arg === "--assistant") parsed.assistant = args[++i]; + else if (arg === "--squad") parsed.squad = args[++i]; + else if (arg === "--transport") { + const v = args[++i]; + if (v === "voice" || v === "chat") parsed.transport = v; + else { + console.error(`❌ --transport must be "voice" or "chat" (got "${v}")`); + process.exit(1); + } + } else if (arg === "--iterations") { + parsed.iterations = parseInt(args[++i] ?? "", 10); + if (Number.isNaN(parsed.iterations)) { + console.error("❌ --iterations requires a number"); + process.exit(1); + } + } else if (arg === "--no-watch") parsed.watch = false; + else if (arg === "--watch") parsed.watch = true; + else if (arg === "--help" || arg === "-h") { + printUsage(); + process.exit(0); + } + } + + return parsed; +} + +async function main(): Promise { + const args = parseArgs(); + const cfg = loadEnvFile(args.env); + const state = loadStateFile(args.env); + + // Disambiguate --target: if the bare value matches a squad name in state + // and not an assistant, treat it as a squad. Explicit --assistant / --squad + // override the heuristic. + let assistant = args.assistant; + let squad = args.squad; + if (assistant && !squad) { + const isSquad = + typeof state.squads[assistant] !== "undefined" && + typeof state.assistants[assistant] === "undefined"; + if (isSquad) { + squad = assistant; + assistant = undefined; + } + } + + console.log( + "═══════════════════════════════════════════════════════════════", + ); + console.log(`🧪 Vapi GitOps Sim Runner — Environment: ${args.env}`); + console.log(` API: ${cfg.baseUrl}`); + console.log( + "═══════════════════════════════════════════════════════════════\n", + ); + + const selection = resolveSelection(state, { + suite: args.suite, + simulations: args.simulations, + }); + const target = resolveTarget(state, { assistant, squad }); + + const summary = await runSimulation(cfg, selection, target, { + watch: args.watch, + iterations: args.iterations, + transport: args.transport, + }); + + console.log(`\n${formatSummary(summary)}\n`); + + if (summary.fail > 0) { + console.error( + `❌ Simulation run failed (${summary.fail} fail / ${summary.pass} pass)`, + ); + process.exit(1); + } + console.log("✅ Simulation run passed."); +} + +main().catch((error) => { + console.error("\n❌ Sim failed:", error instanceof Error ? error.message : error); + process.exit(1); +}); diff --git a/src/sim.ts b/src/sim.ts new file mode 100644 index 0000000..acf05c8 --- /dev/null +++ b/src/sim.ts @@ -0,0 +1,312 @@ +// Simulation runner — wraps `POST /eval/simulation/run`. +// +// Designed to be importable from `sim-cmd.ts` and from tests without +// triggering the CLI argument parser in `config.ts`. Mirrors the env-loading +// pattern used by `eval.ts` (lines 100-130) for the same reason. + +import { existsSync, readFileSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; +import type { StateFile } from "./types.ts"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BASE_DIR = join(__dirname, ".."); + +export interface SimEnv { + env: string; + token: string; + baseUrl: string; +} + +export interface SimTarget { + type: "assistant" | "squad"; + id: string; // platform UUID + resourceName: string; // local-name resolved from state +} + +export interface SimSelection { + // Either a suite (one entry, type "simulationSuite") or a list of + // simulations (multiple entries, each type "simulation"). + entries: Array< + | { type: "simulationSuite"; simulationSuiteId: string } + | { type: "simulation"; simulationId: string } + >; + label: string; // human-friendly summary, e.g. "suite booking-tests" or "simulations a, b" +} + +export interface SimRunOptions { + watch?: boolean; + iterations?: number; + transport?: "voice" | "chat"; +} + +export interface SimRunSummary { + runId: string; + status: string; + pass: number; + fail: number; + skipped: number; + durationMs: number; +} + +export function loadEnvFile(env: string): SimEnv { + const envFiles = [ + join(BASE_DIR, `.env.${env}`), + join(BASE_DIR, `.env.${env}.local`), + join(BASE_DIR, ".env.local"), + ]; + const envVars: Record = {}; + for (const envFile of envFiles) { + if (!existsSync(envFile)) continue; + for (const line of readFileSync(envFile, "utf-8").split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const eq = trimmed.indexOf("="); + if (eq === -1) continue; + const key = trimmed.slice(0, eq).trim(); + let value = trimmed.slice(eq + 1).trim(); + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + if (envVars[key] === undefined) envVars[key] = value; + } + } + const token = process.env.VAPI_TOKEN || envVars.VAPI_TOKEN; + const baseUrl = + process.env.VAPI_BASE_URL || + envVars.VAPI_BASE_URL || + "https://api.vapi.ai"; + if (!token) { + throw new Error( + `VAPI_TOKEN not found. Create .env.${env} with VAPI_TOKEN=your-token`, + ); + } + return { env, token, baseUrl }; +} + +export function loadStateFile(env: string): StateFile { + const stateFile = join(BASE_DIR, `.vapi-state.${env}.json`); + if (!existsSync(stateFile)) { + throw new Error( + `State file not found: .vapi-state.${env}.json. Run 'npm run pull -- ${env} --bootstrap' first.`, + ); + } + const state = JSON.parse(readFileSync(stateFile, "utf-8")) as StateFile; + // Forward-compat: if a future state schema (Stack F) wraps strings as + // {uuid: string}, surface the .uuid field; otherwise treat values as the + // legacy bare string. The local function below handles both shapes. + return state; +} + +// Resolve a local resource name → platform UUID. Stack F migrates state +// values to ResourceState, so this helper accepts both shapes (string OR +// {uuid: string}) and returns just the UUID. Until F lands, it short-circuits +// on the string case. +function stateValueToUuid(value: unknown): string | undefined { + if (typeof value === "string") return value; + if ( + value && + typeof value === "object" && + typeof (value as { uuid?: unknown }).uuid === "string" + ) { + return (value as { uuid: string }).uuid; + } + return undefined; +} + +export function resolveTarget( + state: StateFile, + args: { assistant?: string; squad?: string }, +): SimTarget { + if (args.assistant && args.squad) { + throw new Error("Specify --target as an assistant OR a squad, not both"); + } + if (args.assistant) { + const id = stateValueToUuid((state.assistants as Record)[args.assistant]); + if (!id) { + throw new Error( + `Assistant "${args.assistant}" not found in state. Run 'npm run pull -- ${""}' or check the resource name.`, + ); + } + return { type: "assistant", id, resourceName: args.assistant }; + } + if (args.squad) { + const id = stateValueToUuid((state.squads as Record)[args.squad]); + if (!id) { + throw new Error( + `Squad "${args.squad}" not found in state. Run 'npm run pull -- ${""}' or check the resource name.`, + ); + } + return { type: "squad", id, resourceName: args.squad }; + } + throw new Error("Must specify --target "); +} + +export function resolveSelection( + state: StateFile, + args: { suite?: string; simulations?: string }, +): SimSelection { + if (args.suite && args.simulations) { + throw new Error("Specify --suite OR --simulations, not both"); + } + if (args.suite) { + const id = stateValueToUuid( + (state.simulationSuites as Record)[args.suite], + ); + if (!id) { + throw new Error( + `Simulation suite "${args.suite}" not found in state. Push the suite first or check the name.`, + ); + } + return { + entries: [{ type: "simulationSuite", simulationSuiteId: id }], + label: `suite ${args.suite}`, + }; + } + if (args.simulations) { + const names = args.simulations.split(",").map((s) => s.trim()).filter(Boolean); + if (names.length === 0) { + throw new Error("--simulations requires at least one comma-separated simulation name"); + } + const entries: SimSelection["entries"] = []; + for (const name of names) { + const id = stateValueToUuid( + (state.simulations as Record)[name], + ); + if (!id) { + throw new Error( + `Simulation "${name}" not found in state. Push first or check the name.`, + ); + } + entries.push({ type: "simulation", simulationId: id }); + } + return { entries, label: `simulations ${names.join(", ")}` }; + } + throw new Error("Must specify --suite or --simulations "); +} + +interface SimRunResponse { + id?: string; + evalRunId?: string; + status?: string; + results?: Array<{ status?: string; isSkipped?: boolean }>; + endedReason?: string; + endedMessage?: string; + cost?: number; + [key: string]: unknown; +} + +const POLL_INTERVAL_MS = 3000; +const POLL_TIMEOUT_MS = 600_000; + +async function fetchJson( + cfg: SimEnv, + method: "GET" | "POST", + endpoint: string, + body?: unknown, +): Promise { + const response = await fetch(`${cfg.baseUrl}${endpoint}`, { + method, + headers: { + Authorization: `Bearer ${cfg.token}`, + "Content-Type": "application/json", + }, + ...(body ? { body: JSON.stringify(body) } : {}), + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(`API ${method} ${endpoint} → ${response.status}: ${text}`); + } + return response.json(); +} + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +export async function runSimulation( + cfg: SimEnv, + selection: SimSelection, + target: SimTarget, + options: SimRunOptions = {}, +): Promise { + const body: Record = { + simulations: selection.entries, + target: target.type === "assistant" + ? { type: "assistant", assistantId: target.id } + : { type: "squad", squadId: target.id }, + transport: { + provider: + options.transport === "chat" ? "vapi.webchat" : "vapi.websocket", + }, + }; + if (options.iterations !== undefined) body.iterations = options.iterations; + + console.log( + `🧪 Starting simulation run — ${selection.label} → ${target.type}/${target.resourceName}`, + ); + const start = Date.now(); + const created = (await fetchJson( + cfg, + "POST", + "/eval/simulation/run", + body, + )) as SimRunResponse; + const runId = created.evalRunId ?? created.id; + if (!runId) { + throw new Error( + `POST /eval/simulation/run returned no runId (keys: ${Object.keys(created).join(", ")})`, + ); + } + console.log(` Run ID: ${runId}`); + + let last: SimRunResponse = created; + if (options.watch ?? true) { + while (Date.now() - start < POLL_TIMEOUT_MS) { + await sleep(POLL_INTERVAL_MS); + last = (await fetchJson( + cfg, + "GET", + `/eval/simulation/run/${runId}`, + )) as SimRunResponse; + const status = last.status ?? "running"; + process.stdout.write(`\r Status: ${status} `); + if (status === "ended" || status === "failed" || status === "completed") { + process.stdout.write("\n"); + break; + } + } + if (Date.now() - start >= POLL_TIMEOUT_MS) { + throw new Error( + `Simulation run ${runId} timed out after ${POLL_TIMEOUT_MS / 1000}s`, + ); + } + } + + const results = Array.isArray(last.results) ? last.results : []; + const pass = results.filter((r) => r.status === "pass" && !r.isSkipped).length; + const fail = results.filter((r) => r.status !== "pass" && !r.isSkipped).length; + const skipped = results.filter((r) => r.isSkipped === true).length; + + return { + runId, + status: last.status ?? "unknown", + pass, + fail, + skipped, + durationMs: Date.now() - start, + }; +} + +export function formatSummary(summary: SimRunSummary): string { + const total = summary.pass + summary.fail + summary.skipped; + return [ + `📊 Simulation summary (run ${summary.runId})`, + ` Status: ${summary.status}`, + ` Results: ${summary.pass}/${total} pass, ${summary.fail} fail${summary.skipped > 0 ? `, ${summary.skipped} skipped` : ""}`, + ` Duration: ${(summary.durationMs / 1000).toFixed(1)}s`, + ].join("\n"); +} diff --git a/tests/sim.test.ts b/tests/sim.test.ts new file mode 100644 index 0000000..200e86f --- /dev/null +++ b/tests/sim.test.ts @@ -0,0 +1,134 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { resolveSelection, resolveTarget } from "../src/sim.ts"; +import type { StateFile } from "../src/types.ts"; + +// Stack E — sim-runner argument resolution coverage. +// Tests focus on resolveTarget / resolveSelection — the runtime fetch path +// against `POST /eval/simulation/run` is integration territory and is +// covered manually against a sandbox org. + +function makeState(overrides: Partial = {}): StateFile { + return { + credentials: {}, + assistants: {}, + structuredOutputs: {}, + tools: {}, + squads: {}, + personalities: {}, + scenarios: {}, + simulations: {}, + simulationSuites: {}, + evals: {}, + ...overrides, + }; +} + +test("resolveTarget: resolves assistant by local name to UUID", () => { + const state = makeState({ + assistants: { "intake-agent": "uuid-intake-001" }, + }); + const target = resolveTarget(state, { assistant: "intake-agent" }); + assert.equal(target.type, "assistant"); + assert.equal(target.id, "uuid-intake-001"); + assert.equal(target.resourceName, "intake-agent"); +}); + +test("resolveTarget: resolves squad by local name", () => { + const state = makeState({ squads: { "main-squad": "uuid-squad-001" } }); + const target = resolveTarget(state, { squad: "main-squad" }); + assert.equal(target.type, "squad"); + assert.equal(target.id, "uuid-squad-001"); +}); + +test("resolveTarget: throws when assistant not in state", () => { + const state = makeState(); + assert.throws( + () => resolveTarget(state, { assistant: "missing" }), + /Assistant "missing" not found in state/, + ); +}); + +test("resolveTarget: rejects both assistant and squad simultaneously", () => { + const state = makeState({ + assistants: { a: "x" }, + squads: { b: "y" }, + }); + assert.throws( + () => resolveTarget(state, { assistant: "a", squad: "b" }), + /assistant OR a squad, not both/, + ); +}); + +test("resolveSelection: resolves suite by local name", () => { + const state = makeState({ simulationSuites: { "booking-tests": "uuid-s-1" } }); + const sel = resolveSelection(state, { suite: "booking-tests" }); + assert.equal(sel.entries.length, 1); + assert.deepEqual(sel.entries[0], { + type: "simulationSuite", + simulationSuiteId: "uuid-s-1", + }); + assert.match(sel.label, /booking-tests/); +}); + +test("resolveSelection: resolves comma-separated simulations", () => { + const state = makeState({ + simulations: { "happy-path": "uuid-h", "edge-case": "uuid-e" }, + }); + const sel = resolveSelection(state, { + simulations: "happy-path, edge-case", + }); + assert.equal(sel.entries.length, 2); + assert.deepEqual(sel.entries[0], { + type: "simulation", + simulationId: "uuid-h", + }); + assert.deepEqual(sel.entries[1], { + type: "simulation", + simulationId: "uuid-e", + }); +}); + +test("resolveSelection: throws when suite not in state", () => { + const state = makeState(); + assert.throws( + () => resolveSelection(state, { suite: "missing" }), + /Simulation suite "missing" not found in state/, + ); +}); + +test("resolveSelection: rejects both suite and simulations simultaneously", () => { + const state = makeState({ + simulationSuites: { a: "x" }, + simulations: { b: "y" }, + }); + assert.throws( + () => resolveSelection(state, { suite: "a", simulations: "b" }), + /Specify --suite OR --simulations/, + ); +}); + +test("resolveTarget: handles forward-compat ResourceState shape (Stack F)", () => { + // Stack F migrates state values from `string` to `{uuid: string, ...}`. + // The resolver must accept both shapes so this stack lands cleanly + // before F or after. + const state = { + credentials: {}, + assistants: { + "future-agent": { + uuid: "uuid-future", + lastPulledHash: "abc123", + } as unknown as string, + }, + structuredOutputs: {}, + tools: {}, + squads: {}, + personalities: {}, + scenarios: {}, + simulations: {}, + simulationSuites: {}, + evals: {}, + } as StateFile; + const target = resolveTarget(state, { assistant: "future-agent" }); + assert.equal(target.id, "uuid-future"); +});