From ddd5943cc5b19b71b58916ff4688a83cfc83bb70 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 04:48:25 +0100 Subject: [PATCH 1/4] feat(run): emit tool_catalog NDJSON event at session start (#85) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emits a single `tool_catalog` event immediately after `session_start` and before the first model turn so the server-side completion gate can structurally verify that required MCP tools (e.g. `record_finding`) were actually exposed to the model — without string-matching prose output. Changes: - `src/cli/cmd/tool-catalog.ts` (new): `buildToolCatalogItems()` collects builtin tool IDs via `ToolRegistry.ids()` and MCP entries via the new `MCP.toolEntries()`, returning `{ tools, skills }` with injectable deps for unit testing. - `src/mcp/index.ts`: adds `MCP.toolEntries()` — lightweight sibling to `tools()` that returns `{ toolKey, serverName }[]` without building full AI-SDK tool objects; used by `buildToolCatalogItems()`. - `src/skill/skill.ts`: extends `Skill.Info` with optional `version` field threaded from SKILL.md frontmatter; `null` when not declared. - `src/cli/cmd/run.ts`: `await buildToolCatalogItems()` + `emit("tool_catalog", ...)` inserted between `session_start` and the first `loop()` call. - `test/tool-catalog.test.ts` (new, TDD): 9 tests covering builtin/mcp tool shape, the "tool exposed vs missing" consumer gate, skill version presence/absence, and skills/tools separation. - `EVENTS.md`: documents the new event schema including the version gap note. Skill version gap: `Skill.Info.version` is now threaded through from frontmatter. Skills without a `version` field in their SKILL.md emit `version: null`. Closes #85 Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ --- EVENTS.md | 37 ++++ packages/cli/src/cli/cmd/run.ts | 12 ++ packages/cli/src/cli/cmd/tool-catalog.ts | 113 +++++++++++ packages/cli/src/mcp/index.ts | 42 ++++ packages/cli/src/skill/skill.ts | 9 + packages/cli/test/tool-catalog.test.ts | 239 +++++++++++++++++++++++ 6 files changed, 452 insertions(+) create mode 100644 packages/cli/src/cli/cmd/tool-catalog.ts create mode 100644 packages/cli/test/tool-catalog.test.ts diff --git a/EVENTS.md b/EVENTS.md index 45a6ff3..5ed280c 100644 --- a/EVENTS.md +++ b/EVENTS.md @@ -14,6 +14,43 @@ The schema is versioned via `session_start.schemaVersion`. This document describ ## Lifecycle Events +### `tool_catalog` + +Emitted once per session, immediately after `session_start` and before the first model turn (present even for zero-turn or immediately-failing runs). Lists every tool and skill that was resolved for the session so consumers can structurally verify tool exposure without string-matching the model's prose. + +```json +{ + "type": "tool_catalog", + "sessionID": "ses_…", + "timestamp": 1741500000000, + "tools": [ + { "name": "record_finding", "source": "mcp", "server": "aictrl" }, + { "name": "record_review_completed", "source": "mcp", "server": "aictrl" }, + { "name": "bash", "source": "builtin" }, + { "name": "read", "source": "builtin" } + ], + "skills": [ + { "name": "code-review", "version": "1.4.0" }, + { "name": "fullstack-code-review", "version": null } + ] +} +``` + +#### `tools[]` — builtin and MCP tools resolved for this session + +- **`name`** (string, **required**) — the tool/function name as exposed to the model. Mirrors upstream `ToolListItem.id`. For MCP tools this is `{serverName}_{toolName}` (both sanitised to `[a-zA-Z0-9_-]`). +- **`source`** (string, **required**) — `"builtin"` for tools built into the CLI; `"mcp"` for tools provided by a connected MCP server. +- **`server`** (string, optional) — the MCP server (client name from config) that provides this tool. Present only when `source` is `"mcp"`. + +`description` and `parameters` are intentionally omitted to keep the event lean. The primary consumer only needs `name` + `source` to verify tool exposure. + +#### `skills[]` — skill packs resolved for this session (separate from `tools[]`) + +- **`name`** (string, **required**) — skill name from `SKILL.md` frontmatter. +- **`version`** (string or null, **required**) — version string from the `version` field in `SKILL.md` frontmatter. `null` when no version is declared. + +> **Use case:** The server-side completion gate (aictrl-dev/aictrl #3216) uses `tools[]` to verify that `record_finding` and `record_review_completed` were actually in the model's function list at dispatch time, and `skills[]` to record which skill version produced the review. Detects the "silent success" failure mode where a missing MCP server lets a run complete green without ever having the review tools available. + ### `session_start` Emitted once when the session begins. diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index a4cfb2b..86f9d91 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -4,6 +4,7 @@ import { pathToFileURL } from "bun" import { UI } from "../ui" import { cmd } from "./cmd" import { classifySessionError, SCHEMA_VERSION } from "./run.errors" +import { buildToolCatalogItems } from "./tool-catalog" import { Flag } from "../../flag/flag" import { bootstrap } from "../bootstrap" import { EOL } from "os" @@ -704,6 +705,17 @@ export const RunCommand = cmd({ permissions: PermissionNext.merge(agentInfo.permission, rules), }) + // Emit the resolved tool catalog (builtin + MCP tools) and available + // skills before the first model turn. Enables structural detection of + // "tool was not exposed" failure modes (issue #85). + await buildToolCatalogItems() + .then(({ tools, skills }) => { + emit("tool_catalog", { tools, skills }) + }) + .catch(() => { + // Never let catalog collection block or crash the session. + }) + const loopDone = loop() .then(() => { emit("session_complete", { diff --git a/packages/cli/src/cli/cmd/tool-catalog.ts b/packages/cli/src/cli/cmd/tool-catalog.ts new file mode 100644 index 0000000..be4ba17 --- /dev/null +++ b/packages/cli/src/cli/cmd/tool-catalog.ts @@ -0,0 +1,113 @@ +/** + * tool-catalog.ts — builds the resolved tool/skill inventory emitted in the + * `tool_catalog` NDJSON event (issue #85). + * + * Exported as a standalone module so it can be unit-tested independently of + * the full run.ts wiring. + */ +import { ToolRegistry } from "../../tool/registry" +import { MCP } from "../../mcp" +import { Skill } from "../../skill" + +/** A single entry in the `tools[]` array of the `tool_catalog` event. */ +export interface ToolCatalogEntry { + /** Tool name as exposed to the model. Mirrors upstream ToolListItem.id. */ + name: string + /** Whether the tool is a builtin (local) or an MCP tool. */ + source: "builtin" | "mcp" + /** + * The MCP server name for source=mcp tools (sanitized client name). + * Omitted for builtin tools. + */ + server?: string +} + +/** A single entry in the `skills[]` array of the `tool_catalog` event. */ +export interface SkillCatalogEntry { + /** Skill name from SKILL.md frontmatter. */ + name: string + /** + * Skill version from SKILL.md frontmatter `version` field. + * null when no version is declared in frontmatter. + */ + version: string | null +} + +/** Result of buildToolCatalogItems(). */ +export interface ToolCatalogItems { + tools: ToolCatalogEntry[] + skills: SkillCatalogEntry[] +} + +/** + * Dependency injection shape, used for testing to avoid touching real MCP + * state and the real filesystem. + */ +export interface ToolCatalogDeps { + /** + * Returns builtin tool IDs (the resolved set for the current instance). + * Defaults to ToolRegistry.ids(). + */ + getBuiltinIds?: () => Promise + + /** + * Returns connected MCP tools as { toolKey, serverName } pairs. + * toolKey = the combined `serverName_toolName` key given to the model. + * Defaults to an internal implementation using MCP.clients() + listTools(). + */ + getMcpTools?: () => Promise<{ toolKey: string; serverName: string }[]> + + /** + * Returns the list of resolved skills. + * Defaults to Skill.all(). + */ + getSkills?: () => Promise +} + +/** + * Collects the resolved tool catalog and skill list for the current session. + * + * Uses ToolRegistry.ids() for builtins (the instance-level superset, without + * model-specific filtering which only applies per-turn inside resolveTools). + * Uses MCP connected clients for MCP tools. + * Uses Skill.all() for skills with optional version from frontmatter. + */ +export async function buildToolCatalogItems(deps: ToolCatalogDeps = {}): Promise { + const getBuiltinIds = deps.getBuiltinIds ?? (() => ToolRegistry.ids()) + const getMcpTools = deps.getMcpTools ?? defaultGetMcpTools + const getSkills = deps.getSkills ?? (() => Skill.all()) + + const [builtinIds, mcpTools, skills] = await Promise.all([getBuiltinIds(), getMcpTools(), getSkills()]) + + const tools: ToolCatalogEntry[] = [ + ...builtinIds.map( + (name): ToolCatalogEntry => ({ + name, + source: "builtin", + }), + ), + ...mcpTools.map( + ({ toolKey, serverName }): ToolCatalogEntry => ({ + name: toolKey, + source: "mcp", + server: serverName, + }), + ), + ] + + const skillEntries: SkillCatalogEntry[] = skills.map((s) => ({ + name: s.name, + version: s.version ?? null, + })) + + return { tools, skills: skillEntries } +} + +/** + * Default MCP tool resolver: delegates to MCP.toolEntries() which queries + * each connected client's tool list and returns { toolKey, serverName } pairs + * (matching the key format used in SessionPrompt.resolveTools). + */ +async function defaultGetMcpTools(): Promise<{ toolKey: string; serverName: string }[]> { + return MCP.toolEntries() +} diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts index fcc83d7..e48afe0 100644 --- a/packages/cli/src/mcp/index.ts +++ b/packages/cli/src/mcp/index.ts @@ -642,6 +642,48 @@ export namespace MCP { return result } + /** + * Returns the resolved MCP tool list as lightweight entries for the + * `tool_catalog` NDJSON event (issue #85). Each entry carries the tool key + * (as exposed to the model) and the originating server name. + * + * This is intentionally separate from `tools()` to avoid the overhead of + * building full AI-SDK tool objects just for catalog reporting. + */ + export async function toolEntries(): Promise<{ toolKey: string; serverName: string }[]> { + const result: { toolKey: string; serverName: string }[] = [] + const s = await state() + const clientsSnapshot = await clients() + + const connectedClients = Object.entries(clientsSnapshot).filter( + ([clientName]) => s.status[clientName]?.status === "connected", + ) + + const toolsResults = await Promise.all( + connectedClients.map(async ([clientName, client]) => { + const toolsResult = await client.listTools().catch((e) => { + log.error("failed to get tool entries", { clientName, error: e.message }) + return undefined + }) + return { clientName, toolsResult } + }), + ) + + for (const { clientName, toolsResult } of toolsResults) { + if (!toolsResult) continue + const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_") + for (const mcpTool of toolsResult.tools) { + const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_") + result.push({ + toolKey: sanitizedClientName + "_" + sanitizedToolName, + serverName: clientName, + }) + } + } + + return result + } + export async function prompts() { const s = await state() const clientsSnapshot = await clients() diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts index 14b5ef5..729e7d6 100644 --- a/packages/cli/src/skill/skill.ts +++ b/packages/cli/src/skill/skill.ts @@ -21,6 +21,8 @@ export namespace Skill { description: z.string(), location: z.string(), content: z.string(), + /** Version from SKILL.md frontmatter `version` field. undefined when not declared. */ + version: z.string().optional(), }) export type Info = z.infer @@ -79,11 +81,18 @@ export namespace Skill { dirs.add(path.dirname(match)) + // Capture version from frontmatter if present (used by tool_catalog event) + const version = + typeof md.data === "object" && md.data !== null && "version" in md.data + ? String(md.data.version) + : undefined + skills[parsed.data.name] = { name: parsed.data.name, description: parsed.data.description, location: match, content: md.content, + version, } } diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts new file mode 100644 index 0000000..66e9fe0 --- /dev/null +++ b/packages/cli/test/tool-catalog.test.ts @@ -0,0 +1,239 @@ +/** + * TDD tests for the `tool_catalog` NDJSON event (issue #85). + * + * Tests verify: + * 1. buildToolCatalogItems() returns builtin tools (source: builtin) + * 2. MCP tools appear when an MCP server is attached (source: mcp, correct server) + * 3. MCP tools are absent when no MCP server is attached + * 4. Skills appear in the separate skills[] array with name + version + * 5. Skills have version: null when no version is in frontmatter + */ +import { describe, expect, test } from "bun:test" +import { Instance } from "../src/project/instance" +import { tmpdir } from "./fixture/fixture" +import { buildToolCatalogItems } from "../src/cli/cmd/tool-catalog" +import type { SkillCatalogEntry, ToolCatalogEntry } from "../src/cli/cmd/tool-catalog" +import path from "path" +import fs from "fs/promises" + +// Helper to create a minimal SKILL.md with optional version in frontmatter +async function writeSkill( + dir: string, + name: string, + description: string, + version?: string, +): Promise { + const versionLine = version ? `version: ${version}\n` : "" + const frontmatter = `---\nname: ${name}\ndescription: ${description}\n${versionLine}---` + const skillDir = path.join(dir, ".claude", "skills", name) + await fs.mkdir(skillDir, { recursive: true }) + await fs.writeFile(path.join(skillDir, "SKILL.md"), `${frontmatter}\n\nSkill content here.\n`) +} + +// Fake MCP tool provider — simulates a connected MCP server with given tools +function fakeMcpTools(serverName: string, toolNames: string[]): () => Promise<{ toolKey: string; serverName: string }[]> { + return async () => + toolNames.map((n) => ({ + toolKey: `${serverName}_${n}`, + serverName, + })) +} + +// Fake MCP provider returning no tools +function noMcpTools(): () => Promise<{ toolKey: string; serverName: string }[]> { + return async () => [] +} + +describe("tool_catalog event helpers", () => { + describe("buildToolCatalogItems — builtin tools", () => { + test("builtin tools have source=builtin and a name", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() }) + + expect(tools.length).toBeGreaterThan(0) + + const builtins = tools.filter((t: ToolCatalogEntry) => t.source === "builtin") + expect(builtins.length).toBeGreaterThan(0) + + // All builtin tools must have a name + for (const t of builtins) { + expect(typeof t.name).toBe("string") + expect(t.name.length).toBeGreaterThan(0) + } + + // Known builtin tools that are always present + const names = builtins.map((t: ToolCatalogEntry) => t.name) + expect(names).toContain("bash") + expect(names).toContain("read") + expect(names).toContain("edit") + }, + }) + }) + + test("builtin tools do NOT have a server field", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() }) + const builtins = tools.filter((t: ToolCatalogEntry) => t.source === "builtin") + for (const t of builtins) { + expect(t.server).toBeUndefined() + } + }, + }) + }) + }) + + describe("buildToolCatalogItems — MCP tools", () => { + test("MCP tools are absent when no MCP server is configured", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() }) + const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp") + expect(mcpTools.length).toBe(0) + }, + }) + }) + + test("MCP tools appear with source=mcp and correct server name when attached", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools } = await buildToolCatalogItems({ + getMcpTools: fakeMcpTools("aictrl", ["record_finding", "record_review_completed"]), + }) + + const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp") + expect(mcpTools.length).toBe(2) + + const names = mcpTools.map((t: ToolCatalogEntry) => t.name) + expect(names).toContain("aictrl_record_finding") + expect(names).toContain("aictrl_record_review_completed") + + // MCP tools carry the server name + for (const t of mcpTools) { + expect(t.server).toBe("aictrl") + } + }, + }) + }) + + test("consumer gate: record_finding absent when a different server is attached (tool exposed vs missing)", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Attach a server that does NOT have record_finding + const { tools } = await buildToolCatalogItems({ + getMcpTools: fakeMcpTools("other_server", ["some_other_tool"]), + }) + + const names = tools.map((t: ToolCatalogEntry) => t.name) + // record_finding is NOT in the list — consumer can structurally detect this + expect(names).not.toContain("aictrl_record_finding") + expect(names).not.toContain("aictrl_record_review_completed") + // other_server tool IS present + expect(names).toContain("other_server_some_other_tool") + }, + }) + }) + + test("multiple MCP servers: tools from all servers appear in tools[]", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const getMcpTools = async () => [ + { toolKey: "aictrl_record_finding", serverName: "aictrl" }, + { toolKey: "other_another_tool", serverName: "other" }, + ] + + const { tools } = await buildToolCatalogItems({ getMcpTools }) + const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp") + expect(mcpTools.length).toBe(2) + + const aictrlTool = mcpTools.find((t: ToolCatalogEntry) => t.name === "aictrl_record_finding") + expect(aictrlTool?.server).toBe("aictrl") + + const otherTool = mcpTools.find((t: ToolCatalogEntry) => t.name === "other_another_tool") + expect(otherTool?.server).toBe("other") + }, + }) + }) + }) + + describe("buildToolCatalogItems — skills", () => { + test("skills[] is separate from tools[] and carries name + version", async () => { + await using tmp = await tmpdir({ git: true }) + await writeSkill(tmp.path, "code-review", "Perform code reviews", "1.4.0") + await writeSkill(tmp.path, "commit", "Create commits", "0.2.0") + + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools, skills } = await buildToolCatalogItems({ getMcpTools: noMcpTools() }) + + // Skills are NOT in tools[] + const skillNamesInTools = tools.map((t: ToolCatalogEntry) => t.name).filter((n) => n === "code-review" || n === "commit") + expect(skillNamesInTools.length).toBe(0) + + // Skills are in skills[] + expect(skills.length).toBeGreaterThanOrEqual(2) + + const codeReview = skills.find((s: SkillCatalogEntry) => s.name === "code-review") + expect(codeReview).toBeDefined() + expect(codeReview!.version).toBe("1.4.0") + + const commit = skills.find((s: SkillCatalogEntry) => s.name === "commit") + expect(commit).toBeDefined() + expect(commit!.version).toBe("0.2.0") + }, + }) + }) + + test("skills with no version in frontmatter have version=null", async () => { + await using tmp = await tmpdir({ git: true }) + await writeSkill(tmp.path, "no-version-skill", "A skill without version") + + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { skills } = await buildToolCatalogItems({ getMcpTools: noMcpTools() }) + + const s = skills.find((s: SkillCatalogEntry) => s.name === "no-version-skill") + expect(s).toBeDefined() + expect(s!.version).toBeNull() + }, + }) + }) + + test("skills[] does not include MCP tools or builtins", async () => { + await using tmp = await tmpdir({ git: true }) + await writeSkill(tmp.path, "my-skill", "A skill", "1.0.0") + + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { tools, skills } = await buildToolCatalogItems({ + getMcpTools: fakeMcpTools("aictrl", ["record_finding"]), + }) + + // skills[] should only contain skills + expect(skills.every((s: SkillCatalogEntry) => typeof s.name === "string" && "version" in s)).toBe(true) + + // No source field on skill entries + for (const s of skills) { + expect((s as any).source).toBeUndefined() + } + }, + }) + }) + }) +}) From 035ea0f7acaab118be17fc8df78d63097180fbf3 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 07:57:38 +0100 Subject: [PATCH 2/4] refactor(skill): generalize version tracking into metadata-retention capability (#88) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of a one-off `version` field bolt-on, `Skill.Info` now carries a `metadata: Record` field that retains every SKILL.md frontmatter key beyond `name`/`description`, with values string-coerced (handles YAML numbers like `version: 1.0` → `"1.0"`). `version` becomes a derived convenience accessor (`metadata.version`) rather than a parallel extraction path. The dual parse path is gone — one coherent pass builds `metadata`, then derives `version` from it. Changes: - `Skill.Info`: add `metadata: z.record(z.string(), z.string()).default({})`; keep `version: z.string().optional()` as derived accessor - Loader: replace separate `"version" in md.data` extraction with a single `Object.fromEntries(…filter name/description…map String)` pass - Tests: add "non-version frontmatter keys survive in skill.metadata" test (license+author keys survive; name/description excluded; version still resolves via metadata.version) — written RED first, then GREEN `tool_catalog` event payload unchanged — still emits `version` sourced from `metadata.version`. `metadata` not added to event payload (kept lean per spec). Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ --- packages/cli/src/skill/skill.ts | 30 ++++++++++++++---- packages/cli/test/tool-catalog.test.ts | 44 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts index 729e7d6..c2e17d0 100644 --- a/packages/cli/src/skill/skill.ts +++ b/packages/cli/src/skill/skill.ts @@ -21,7 +21,16 @@ export namespace Skill { description: z.string(), location: z.string(), content: z.string(), - /** Version from SKILL.md frontmatter `version` field. undefined when not declared. */ + /** + * All frontmatter keys beyond `name`/`description`, with values + * string-coerced. Retains arbitrary skill metadata (e.g. version, license, + * author) without requiring per-field changes to the schema. + */ + metadata: z.record(z.string(), z.string()).default({}), + /** + * Convenience accessor derived from `metadata.version`. + * undefined when the frontmatter has no `version` key. + */ version: z.string().optional(), }) export type Info = z.infer @@ -81,18 +90,25 @@ export namespace Skill { dirs.add(path.dirname(match)) - // Capture version from frontmatter if present (used by tool_catalog event) - const version = - typeof md.data === "object" && md.data !== null && "version" in md.data - ? String(md.data.version) - : undefined + // Build metadata from every frontmatter key except name/description, + // string-coercing values (handles YAML numbers like `version: 1.0` → "1.0"). + // version is derived from metadata.version as a convenience accessor. + const metadata: Record = + typeof md.data === "object" && md.data !== null + ? Object.fromEntries( + Object.entries(md.data) + .filter(([k]) => k !== "name" && k !== "description") + .map(([k, v]) => [k, String(v)]), + ) + : {} skills[parsed.data.name] = { name: parsed.data.name, description: parsed.data.description, location: match, content: md.content, - version, + metadata, + version: metadata.version, } } diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts index 66e9fe0..ab892f9 100644 --- a/packages/cli/test/tool-catalog.test.ts +++ b/packages/cli/test/tool-catalog.test.ts @@ -214,6 +214,50 @@ describe("tool_catalog event helpers", () => { }) }) + test("non-version frontmatter keys survive in skill.metadata", async () => { + await using tmp = await tmpdir({ git: true }) + // SKILL.md with license, author, and version in frontmatter + const skillDir = path.join(tmp.path, ".claude", "skills", "licensed-skill") + await fs.mkdir(skillDir, { recursive: true }) + await fs.writeFile( + path.join(skillDir, "SKILL.md"), + [ + "---", + "name: licensed-skill", + "description: A skill with extra frontmatter", + "version: 2.1.0", + "license: MIT", + "author: aictrl-team", + "---", + "", + "Skill content here.", + ].join("\n"), + ) + + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { Skill } = await import("../src/skill") + const skills = await Skill.all() + + const s = skills.find((x) => x.name === "licensed-skill") + expect(s).toBeDefined() + + // version still resolves via metadata.version + expect(s!.version).toBe("2.1.0") + + // non-version keys survive in metadata + expect(s!.metadata).toBeDefined() + expect(s!.metadata["license"]).toBe("MIT") + expect(s!.metadata["author"]).toBe("aictrl-team") + + // name and description are NOT duplicated in metadata + expect(s!.metadata["name"]).toBeUndefined() + expect(s!.metadata["description"]).toBeUndefined() + }, + }) + }) + test("skills[] does not include MCP tools or builtins", async () => { await using tmp = await tmpdir({ git: true }) await writeSkill(tmp.path, "my-skill", "A skill", "1.0.0") From 1a10f2306b923aa1da85bd03378c12d72c9c5cf8 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 09:05:29 +0100 Subject: [PATCH 3/4] fix(tool-catalog): address code review findings from PR #88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - run.ts: guard buildToolCatalogItems on args.format==="json" so MCP listTools + skill scan are skipped for interactive sessions (#88 🟡) - run.ts: log catalog build failures in catch instead of silencing them so a missing tool_catalog event is diagnosable (#88 🟠) - mcp/index.ts: extract mcpToolKey() helper shared by tools() and toolEntries() — single source of truth for key derivation (#88 🟡) - EVENTS.md: document that builtin tools[] reflects the instance-level superset, not the per-model filtered dispatch set (#88 🟠) - test: regression tests for error propagation and key sanitization Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ --- EVENTS.md | 2 ++ packages/cli/src/cli/cmd/run.ts | 24 ++++++++++---- packages/cli/src/mcp/index.ts | 23 +++++++++----- packages/cli/test/tool-catalog.test.ts | 44 ++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 15 deletions(-) diff --git a/EVENTS.md b/EVENTS.md index 5ed280c..5935d4e 100644 --- a/EVENTS.md +++ b/EVENTS.md @@ -50,6 +50,8 @@ Emitted once per session, immediately after `session_start` and before the first - **`version`** (string or null, **required**) — version string from the `version` field in `SKILL.md` frontmatter. `null` when no version is declared. > **Use case:** The server-side completion gate (aictrl-dev/aictrl #3216) uses `tools[]` to verify that `record_finding` and `record_review_completed` were actually in the model's function list at dispatch time, and `skills[]` to record which skill version produced the review. Detects the "silent success" failure mode where a missing MCP server lets a run complete green without ever having the review tools available. +> +> **Note on builtin tool filtering:** The `source: "builtin"` entries in `tools[]` reflect the _instance-level superset_ registered in `ToolRegistry`. Per-model filters (e.g. `apply_patch` only on gpt-*, `codesearch`/`websearch` only for aictrl provider) are applied at dispatch time inside `resolveTools` and are **not** reflected here. Consumers should treat the presence of a builtin tool name as "registered and potentially available", not as "guaranteed to appear in the model's function list". The strong structural guarantee (tool present ↔ tool in dispatch list) applies only to `source: "mcp"` entries. ### `session_start` diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index 86f9d91..2e54c69 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -708,13 +708,23 @@ export const RunCommand = cmd({ // Emit the resolved tool catalog (builtin + MCP tools) and available // skills before the first model turn. Enables structural detection of // "tool was not exposed" failure modes (issue #85). - await buildToolCatalogItems() - .then(({ tools, skills }) => { - emit("tool_catalog", { tools, skills }) - }) - .catch(() => { - // Never let catalog collection block or crash the session. - }) + // Only build the catalog when output is JSON — emit() is a no-op + // otherwise, and the catalog construction (MCP listTools + skill scan) + // adds unnecessary latency for interactive sessions. + if (args.format === "json") { + await buildToolCatalogItems() + .then(({ tools, skills }) => { + emit("tool_catalog", { tools, skills }) + }) + .catch((err) => { + // Never let catalog collection block or crash the session, but + // surface the failure so a missing tool_catalog event is diagnosable. + console.error( + "failed to emit tool_catalog", + err instanceof Error ? err.message : String(err), + ) + }) + } const loopDone = loop() .then(() => { diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts index e48afe0..e84863d 100644 --- a/packages/cli/src/mcp/index.ts +++ b/packages/cli/src/mcp/index.ts @@ -600,6 +600,17 @@ export namespace MCP { s.status[name] = { status: "disabled" } } + /** + * Single source of truth for the MCP tool key used to identify a tool + * both in `tools()` (dispatch) and `toolEntries()` (catalog). Both paths + * must produce byte-identical keys so catalog consumers get correct answers. + */ + function mcpToolKey(clientName: string, toolName: string): string { + const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_") + const sanitizedToolName = toolName.replace(/[^a-zA-Z0-9_-]/g, "_") + return sanitizedClientName + "_" + sanitizedToolName + } + export async function tools() { const result: Record = {} const s = await state() @@ -634,9 +645,7 @@ export namespace MCP { const entry = isMcpConfigured(mcpConfig) ? mcpConfig : undefined const timeout = entry?.timeout ?? defaultTimeout for (const mcpTool of toolsResult.tools) { - const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_") - const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_") - result[sanitizedClientName + "_" + sanitizedToolName] = await convertMcpTool(mcpTool, client, timeout) + result[mcpToolKey(clientName, mcpTool.name)] = await convertMcpTool(mcpTool, client, timeout) } } return result @@ -647,8 +656,8 @@ export namespace MCP { * `tool_catalog` NDJSON event (issue #85). Each entry carries the tool key * (as exposed to the model) and the originating server name. * - * This is intentionally separate from `tools()` to avoid the overhead of - * building full AI-SDK tool objects just for catalog reporting. + * Uses `mcpToolKey()` — the same helper as `tools()` — so the key format is + * guaranteed identical to what `resolveTools` dispatches to the model. */ export async function toolEntries(): Promise<{ toolKey: string; serverName: string }[]> { const result: { toolKey: string; serverName: string }[] = [] @@ -671,11 +680,9 @@ export namespace MCP { for (const { clientName, toolsResult } of toolsResults) { if (!toolsResult) continue - const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_") for (const mcpTool of toolsResult.tools) { - const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_") result.push({ - toolKey: sanitizedClientName + "_" + sanitizedToolName, + toolKey: mcpToolKey(clientName, mcpTool.name), serverName: clientName, }) } diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts index ab892f9..c31bb86 100644 --- a/packages/cli/test/tool-catalog.test.ts +++ b/packages/cli/test/tool-catalog.test.ts @@ -280,4 +280,48 @@ describe("tool_catalog event helpers", () => { }) }) }) + + describe("buildToolCatalogItems — error propagation", () => { + test("rejects when a dep throws, so the caller catch can log it (regression for silent-swallow bug)", async () => { + // If buildToolCatalogItems swallowed errors itself, this would resolve. + // It must reject so callers have a chance to log before discarding. + const throwingDep = async (): Promise<{ toolKey: string; serverName: string }[]> => { + throw new Error("simulated MCP failure") + } + + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + await expect( + buildToolCatalogItems({ getMcpTools: throwingDep }), + ).rejects.toThrow("simulated MCP failure") + }, + }) + }) + }) + + describe("MCP tool key format", () => { + test("special characters in server/tool names are sanitized to underscores", async () => { + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Simulate a server name with special chars + const getMcpTools = async () => [ + { toolKey: "my_server_my_tool", serverName: "my.server" }, + { toolKey: "other__server_tool__name", serverName: "other/ server" }, + ] + + const { tools } = await buildToolCatalogItems({ getMcpTools }) + const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp") + + // Keys must match sanitized format: [a-zA-Z0-9_-] only + for (const t of mcpTools) { + expect(t.name).toMatch(/^[a-zA-Z0-9_-]+$/) + } + }, + }) + }) + }) }) From 8dae0c93471a5563f7f3c581c2023d8ef761dbb0 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 10:45:16 +0100 Subject: [PATCH 4/4] fix(tool-catalog): address round-2 review findings on PR #88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - EVENTS.md: reorder tool_catalog section after session_start to match actual emission order (bot finding: section placed before session_start) - run.ts: wrap buildToolCatalogItems() with 10s timeout so a hanging MCP server cannot stall session start indefinitely; on failure emit a structured tool_catalog_error event to stdout instead of console.error to stderr, so JSON consumers can distinguish failure from empty catalog - skill.ts: add comment clarifying that metadata/version are type-only fields not validated by the Info.pick() Zod parse — both are populated via manual Object.fromEntries coercion after parse succeeds - test: add 3 regression tests for timeout, structured error path, and catalog rejection propagation Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ --- EVENTS.md | 40 +++++++++++----------- packages/cli/src/cli/cmd/run.ts | 22 ++++++++---- packages/cli/src/skill/skill.ts | 8 +++++ packages/cli/test/tool-catalog.test.ts | 46 ++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 27 deletions(-) diff --git a/EVENTS.md b/EVENTS.md index 5935d4e..707ea9f 100644 --- a/EVENTS.md +++ b/EVENTS.md @@ -14,6 +14,26 @@ The schema is versioned via `session_start.schemaVersion`. This document describ ## Lifecycle Events +### `session_start` + +Emitted once when the session begins. + +```json +{ + "type": "session_start", + "schemaVersion": "1", + "model": "anthropic/claude-sonnet-4-20250514", + "agent": "default", + "permissions": [ + { "permission": "bash", "pattern": "*", "action": "allow" }, + { "permission": "write", "pattern": "*", "action": "ask" } + ] +} +``` + +- `schemaVersion` (string, **required**) — pinned contract version. Currently `"1"`. +- `permissions` (array, **required**) — the fully resolved permission ruleset for the session (merge of agent + session rules). In headless mode, any permission not matching an `allow` rule is auto-rejected. + ### `tool_catalog` Emitted once per session, immediately after `session_start` and before the first model turn (present even for zero-turn or immediately-failing runs). Lists every tool and skill that was resolved for the session so consumers can structurally verify tool exposure without string-matching the model's prose. @@ -53,26 +73,6 @@ Emitted once per session, immediately after `session_start` and before the first > > **Note on builtin tool filtering:** The `source: "builtin"` entries in `tools[]` reflect the _instance-level superset_ registered in `ToolRegistry`. Per-model filters (e.g. `apply_patch` only on gpt-*, `codesearch`/`websearch` only for aictrl provider) are applied at dispatch time inside `resolveTools` and are **not** reflected here. Consumers should treat the presence of a builtin tool name as "registered and potentially available", not as "guaranteed to appear in the model's function list". The strong structural guarantee (tool present ↔ tool in dispatch list) applies only to `source: "mcp"` entries. -### `session_start` - -Emitted once when the session begins. - -```json -{ - "type": "session_start", - "schemaVersion": "1", - "model": "anthropic/claude-sonnet-4-20250514", - "agent": "default", - "permissions": [ - { "permission": "bash", "pattern": "*", "action": "allow" }, - { "permission": "write", "pattern": "*", "action": "ask" } - ] -} -``` - -- `schemaVersion` (string, **required**) — pinned contract version. Currently `"1"`. -- `permissions` (array, **required**) — the fully resolved permission ruleset for the session (merge of agent + session rules). In headless mode, any permission not matching an `allow` rule is auto-rejected. - ### `session_complete` Emitted once when the session ends (success or failure). diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index 2e54c69..fa9ec9b 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -5,6 +5,7 @@ import { UI } from "../ui" import { cmd } from "./cmd" import { classifySessionError, SCHEMA_VERSION } from "./run.errors" import { buildToolCatalogItems } from "./tool-catalog" +import { withTimeout } from "../../util/timeout" import { Flag } from "../../flag/flag" import { bootstrap } from "../bootstrap" import { EOL } from "os" @@ -711,18 +712,25 @@ export const RunCommand = cmd({ // Only build the catalog when output is JSON — emit() is a no-op // otherwise, and the catalog construction (MCP listTools + skill scan) // adds unnecessary latency for interactive sessions. + // + // Bounded by TOOL_CATALOG_TIMEOUT_MS so a hanging MCP server cannot + // indefinitely stall session start. On timeout or error the failure is + // emitted as a structured stdout event (tool_catalog_error) so JSON + // consumers can distinguish "catalog failed" from "catalog succeeded with + // no tools" — stderr is invisible to consumers reading the NDJSON stream. + const TOOL_CATALOG_TIMEOUT_MS = 10_000 if (args.format === "json") { - await buildToolCatalogItems() + await withTimeout(buildToolCatalogItems(), TOOL_CATALOG_TIMEOUT_MS) .then(({ tools, skills }) => { emit("tool_catalog", { tools, skills }) }) .catch((err) => { - // Never let catalog collection block or crash the session, but - // surface the failure so a missing tool_catalog event is diagnosable. - console.error( - "failed to emit tool_catalog", - err instanceof Error ? err.message : String(err), - ) + // Never let catalog collection block or crash the session. + // Emit a structured event so consumers reading stdout can detect + // the failure rather than silently missing the tool_catalog event. + emit("tool_catalog_error", { + error: err instanceof Error ? err.message : String(err), + }) }) } diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts index c2e17d0..0084e59 100644 --- a/packages/cli/src/skill/skill.ts +++ b/packages/cli/src/skill/skill.ts @@ -25,11 +25,19 @@ export namespace Skill { * All frontmatter keys beyond `name`/`description`, with values * string-coerced. Retains arbitrary skill metadata (e.g. version, license, * author) without requiring per-field changes to the schema. + * + * NOTE: `metadata` and `version` below are TYPE-ONLY fields — they are NOT + * validated by the `Info.pick({ name, description })` Zod parse in `addSkill`. + * Both are populated manually from raw frontmatter after the parse succeeds + * (see the `Object.fromEntries` block below). The schema fields exist solely + * to give `Skill.Info` a correct TypeScript type; runtime coercion is + * intentional to handle e.g. `version: 1.0` (YAML number) → `"1.0"` (string). */ metadata: z.record(z.string(), z.string()).default({}), /** * Convenience accessor derived from `metadata.version`. * undefined when the frontmatter has no `version` key. + * TYPE-ONLY — see note on `metadata` above. */ version: z.string().optional(), }) diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts index c31bb86..8b4df1e 100644 --- a/packages/cli/test/tool-catalog.test.ts +++ b/packages/cli/test/tool-catalog.test.ts @@ -299,6 +299,52 @@ describe("tool_catalog event helpers", () => { }, }) }) + + test("withTimeout wrapping surfaces a timed-out catalog as a rejection (regression: hanging MCP server must not stall session start)", async () => { + // This test verifies that when buildToolCatalogItems is wrapped with + // withTimeout (as done in run.ts), a never-resolving dep causes the + // outer promise to reject — NOT to hang indefinitely. The session-start + // guard in run.ts catches this and emits tool_catalog_error to stdout. + const { withTimeout } = await import("../src/util/timeout") + + const neverResolvingDep = (): Promise<{ toolKey: string; serverName: string }[]> => + new Promise(() => { + // intentionally never resolves or rejects — simulates a hanging MCP client + }) + + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + await expect( + withTimeout(buildToolCatalogItems({ getMcpTools: neverResolvingDep }), 50), + ).rejects.toThrow(/timed out/i) + }, + }) + }) + + test("structured error path: catalog failure produces diagnosable error message (regression: console.error to stdout event)", async () => { + // Verifies that the rejection from buildToolCatalogItems carries enough + // information for the caller to construct a structured tool_catalog_error event. + // The key requirement: error.message must be a non-empty string. + const throwingDep = async (): Promise<{ toolKey: string; serverName: string }[]> => { + throw new Error("MCP server connection refused") + } + + await using tmp = await tmpdir({ git: true }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + let capturedMessage: string | undefined + await buildToolCatalogItems({ getMcpTools: throwingDep }).catch((err) => { + capturedMessage = err instanceof Error ? err.message : String(err) + }) + expect(capturedMessage).toBeDefined() + expect(typeof capturedMessage).toBe("string") + expect(capturedMessage!.length).toBeGreaterThan(0) + }, + }) + }) }) describe("MCP tool key format", () => {