From ddd5943cc5b19b71b58916ff4688a83cfc83bb70 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 04:48:25 +0100
Subject: [PATCH 1/4] feat(run): emit tool_catalog NDJSON event at session
 start (#85)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Emits a single `tool_catalog` event immediately after `session_start` and
before the first model turn so the server-side completion gate can structurally
verify that required MCP tools (e.g. `record_finding`) were actually exposed to
the model — without string-matching prose output.

Changes:
- `src/cli/cmd/tool-catalog.ts` (new): `buildToolCatalogItems()` collects
  builtin tool IDs via `ToolRegistry.ids()` and MCP entries via the new
  `MCP.toolEntries()`, returning `{ tools, skills }` with injectable deps for
  unit testing.
- `src/mcp/index.ts`: adds `MCP.toolEntries()` — lightweight sibling to
  `tools()` that returns `{ toolKey, serverName }[]` without building full
  AI-SDK tool objects; used by `buildToolCatalogItems()`.
- `src/skill/skill.ts`: extends `Skill.Info` with optional `version` field
  threaded from SKILL.md frontmatter; `null` when not declared.
- `src/cli/cmd/run.ts`: `await buildToolCatalogItems()` + `emit("tool_catalog",
  ...)` inserted between `session_start` and the first `loop()` call.
- `test/tool-catalog.test.ts` (new, TDD): 9 tests covering builtin/mcp
  tool shape, the "tool exposed vs missing" consumer gate, skill version
  presence/absence, and skills/tools separation.
- `EVENTS.md`: documents the new event schema including the version gap note.

Skill version gap: `Skill.Info.version` is now threaded through from
frontmatter. Skills without a `version` field in their SKILL.md emit
`version: null`.

Closes #85

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ
---
 EVENTS.md                                |  37 ++++
 packages/cli/src/cli/cmd/run.ts          |  12 ++
 packages/cli/src/cli/cmd/tool-catalog.ts | 113 +++++++++++
 packages/cli/src/mcp/index.ts            |  42 ++++
 packages/cli/src/skill/skill.ts          |   9 +
 packages/cli/test/tool-catalog.test.ts   | 239 +++++++++++++++++++++++
 6 files changed, 452 insertions(+)
 create mode 100644 packages/cli/src/cli/cmd/tool-catalog.ts
 create mode 100644 packages/cli/test/tool-catalog.test.ts

diff --git a/EVENTS.md b/EVENTS.md
index 45a6ff3..5ed280c 100644
--- a/EVENTS.md
+++ b/EVENTS.md
@@ -14,6 +14,43 @@ The schema is versioned via `session_start.schemaVersion`. This document describ
 
 ## Lifecycle Events
 
+### `tool_catalog`
+
+Emitted once per session, immediately after `session_start` and before the first model turn (present even for zero-turn or immediately-failing runs). Lists every tool and skill that was resolved for the session so consumers can structurally verify tool exposure without string-matching the model's prose.
+
+```json
+{
+  "type": "tool_catalog",
+  "sessionID": "ses_…",
+  "timestamp": 1741500000000,
+  "tools": [
+    { "name": "record_finding", "source": "mcp", "server": "aictrl" },
+    { "name": "record_review_completed", "source": "mcp", "server": "aictrl" },
+    { "name": "bash", "source": "builtin" },
+    { "name": "read", "source": "builtin" }
+  ],
+  "skills": [
+    { "name": "code-review", "version": "1.4.0" },
+    { "name": "fullstack-code-review", "version": null }
+  ]
+}
+```
+
+#### `tools[]` — builtin and MCP tools resolved for this session
+
+- **`name`** (string, **required**) — the tool/function name as exposed to the model. Mirrors upstream `ToolListItem.id`. For MCP tools this is `{serverName}_{toolName}` (both sanitised to `[a-zA-Z0-9_-]`).
+- **`source`** (string, **required**) — `"builtin"` for tools built into the CLI; `"mcp"` for tools provided by a connected MCP server.
+- **`server`** (string, optional) — the MCP server (client name from config) that provides this tool. Present only when `source` is `"mcp"`.
+
+`description` and `parameters` are intentionally omitted to keep the event lean. The primary consumer only needs `name` + `source` to verify tool exposure.
+
+#### `skills[]` — skill packs resolved for this session (separate from `tools[]`)
+
+- **`name`** (string, **required**) — skill name from `SKILL.md` frontmatter.
+- **`version`** (string or null, **required**) — version string from the `version` field in `SKILL.md` frontmatter. `null` when no version is declared.
+
+> **Use case:** The server-side completion gate (aictrl-dev/aictrl #3216) uses `tools[]` to verify that `record_finding` and `record_review_completed` were actually in the model's function list at dispatch time, and `skills[]` to record which skill version produced the review. Detects the "silent success" failure mode where a missing MCP server lets a run complete green without ever having the review tools available.
+
 ### `session_start`
 
 Emitted once when the session begins.
diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index a4cfb2b..86f9d91 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -4,6 +4,7 @@ import { pathToFileURL } from "bun"
 import { UI } from "../ui"
 import { cmd } from "./cmd"
 import { classifySessionError, SCHEMA_VERSION } from "./run.errors"
+import { buildToolCatalogItems } from "./tool-catalog"
 import { Flag } from "../../flag/flag"
 import { bootstrap } from "../bootstrap"
 import { EOL } from "os"
@@ -704,6 +705,17 @@ export const RunCommand = cmd({
         permissions: PermissionNext.merge(agentInfo.permission, rules),
       })
 
+      // Emit the resolved tool catalog (builtin + MCP tools) and available
+      // skills before the first model turn. Enables structural detection of
+      // "tool was not exposed" failure modes (issue #85).
+      await buildToolCatalogItems()
+        .then(({ tools, skills }) => {
+          emit("tool_catalog", { tools, skills })
+        })
+        .catch(() => {
+          // Never let catalog collection block or crash the session.
+        })
+
       const loopDone = loop()
         .then(() => {
           emit("session_complete", {
diff --git a/packages/cli/src/cli/cmd/tool-catalog.ts b/packages/cli/src/cli/cmd/tool-catalog.ts
new file mode 100644
index 0000000..be4ba17
--- /dev/null
+++ b/packages/cli/src/cli/cmd/tool-catalog.ts
@@ -0,0 +1,113 @@
+/**
+ * tool-catalog.ts — builds the resolved tool/skill inventory emitted in the
+ * `tool_catalog` NDJSON event (issue #85).
+ *
+ * Exported as a standalone module so it can be unit-tested independently of
+ * the full run.ts wiring.
+ */
+import { ToolRegistry } from "../../tool/registry"
+import { MCP } from "../../mcp"
+import { Skill } from "../../skill"
+
+/** A single entry in the `tools[]` array of the `tool_catalog` event. */
+export interface ToolCatalogEntry {
+  /** Tool name as exposed to the model. Mirrors upstream ToolListItem.id. */
+  name: string
+  /** Whether the tool is a builtin (local) or an MCP tool. */
+  source: "builtin" | "mcp"
+  /**
+   * The MCP server name for source=mcp tools (sanitized client name).
+   * Omitted for builtin tools.
+   */
+  server?: string
+}
+
+/** A single entry in the `skills[]` array of the `tool_catalog` event. */
+export interface SkillCatalogEntry {
+  /** Skill name from SKILL.md frontmatter. */
+  name: string
+  /**
+   * Skill version from SKILL.md frontmatter `version` field.
+   * null when no version is declared in frontmatter.
+   */
+  version: string | null
+}
+
+/** Result of buildToolCatalogItems(). */
+export interface ToolCatalogItems {
+  tools: ToolCatalogEntry[]
+  skills: SkillCatalogEntry[]
+}
+
+/**
+ * Dependency injection shape, used for testing to avoid touching real MCP
+ * state and the real filesystem.
+ */
+export interface ToolCatalogDeps {
+  /**
+   * Returns builtin tool IDs (the resolved set for the current instance).
+   * Defaults to ToolRegistry.ids().
+   */
+  getBuiltinIds?: () => Promise<string[]>
+
+  /**
+   * Returns connected MCP tools as { toolKey, serverName } pairs.
+   * toolKey = the combined `serverName_toolName` key given to the model.
+   * Defaults to an internal implementation using MCP.clients() + listTools().
+   */
+  getMcpTools?: () => Promise<{ toolKey: string; serverName: string }[]>
+
+  /**
+   * Returns the list of resolved skills.
+   * Defaults to Skill.all().
+   */
+  getSkills?: () => Promise<Skill.Info[]>
+}
+
+/**
+ * Collects the resolved tool catalog and skill list for the current session.
+ *
+ * Uses ToolRegistry.ids() for builtins (the instance-level superset, without
+ * model-specific filtering which only applies per-turn inside resolveTools).
+ * Uses MCP connected clients for MCP tools.
+ * Uses Skill.all() for skills with optional version from frontmatter.
+ */
+export async function buildToolCatalogItems(deps: ToolCatalogDeps = {}): Promise<ToolCatalogItems> {
+  const getBuiltinIds = deps.getBuiltinIds ?? (() => ToolRegistry.ids())
+  const getMcpTools = deps.getMcpTools ?? defaultGetMcpTools
+  const getSkills = deps.getSkills ?? (() => Skill.all())
+
+  const [builtinIds, mcpTools, skills] = await Promise.all([getBuiltinIds(), getMcpTools(), getSkills()])
+
+  const tools: ToolCatalogEntry[] = [
+    ...builtinIds.map(
+      (name): ToolCatalogEntry => ({
+        name,
+        source: "builtin",
+      }),
+    ),
+    ...mcpTools.map(
+      ({ toolKey, serverName }): ToolCatalogEntry => ({
+        name: toolKey,
+        source: "mcp",
+        server: serverName,
+      }),
+    ),
+  ]
+
+  const skillEntries: SkillCatalogEntry[] = skills.map((s) => ({
+    name: s.name,
+    version: s.version ?? null,
+  }))
+
+  return { tools, skills: skillEntries }
+}
+
+/**
+ * Default MCP tool resolver: delegates to MCP.toolEntries() which queries
+ * each connected client's tool list and returns { toolKey, serverName } pairs
+ * (matching the key format used in SessionPrompt.resolveTools).
+ */
+async function defaultGetMcpTools(): Promise<{ toolKey: string; serverName: string }[]> {
+  return MCP.toolEntries()
+}
diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts
index fcc83d7..e48afe0 100644
--- a/packages/cli/src/mcp/index.ts
+++ b/packages/cli/src/mcp/index.ts
@@ -642,6 +642,48 @@ export namespace MCP {
     return result
   }
 
+  /**
+   * Returns the resolved MCP tool list as lightweight entries for the
+   * `tool_catalog` NDJSON event (issue #85). Each entry carries the tool key
+   * (as exposed to the model) and the originating server name.
+   *
+   * This is intentionally separate from `tools()` to avoid the overhead of
+   * building full AI-SDK tool objects just for catalog reporting.
+   */
+  export async function toolEntries(): Promise<{ toolKey: string; serverName: string }[]> {
+    const result: { toolKey: string; serverName: string }[] = []
+    const s = await state()
+    const clientsSnapshot = await clients()
+
+    const connectedClients = Object.entries(clientsSnapshot).filter(
+      ([clientName]) => s.status[clientName]?.status === "connected",
+    )
+
+    const toolsResults = await Promise.all(
+      connectedClients.map(async ([clientName, client]) => {
+        const toolsResult = await client.listTools().catch((e) => {
+          log.error("failed to get tool entries", { clientName, error: e.message })
+          return undefined
+        })
+        return { clientName, toolsResult }
+      }),
+    )
+
+    for (const { clientName, toolsResult } of toolsResults) {
+      if (!toolsResult) continue
+      const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_")
+      for (const mcpTool of toolsResult.tools) {
+        const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_")
+        result.push({
+          toolKey: sanitizedClientName + "_" + sanitizedToolName,
+          serverName: clientName,
+        })
+      }
+    }
+
+    return result
+  }
+
   export async function prompts() {
     const s = await state()
     const clientsSnapshot = await clients()
diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts
index 14b5ef5..729e7d6 100644
--- a/packages/cli/src/skill/skill.ts
+++ b/packages/cli/src/skill/skill.ts
@@ -21,6 +21,8 @@ export namespace Skill {
     description: z.string(),
     location: z.string(),
     content: z.string(),
+    /** Version from SKILL.md frontmatter `version` field. undefined when not declared. */
+    version: z.string().optional(),
   })
   export type Info = z.infer<typeof Info>
 
@@ -79,11 +81,18 @@ export namespace Skill {
 
       dirs.add(path.dirname(match))
 
+      // Capture version from frontmatter if present (used by tool_catalog event)
+      const version =
+        typeof md.data === "object" && md.data !== null && "version" in md.data
+          ? String(md.data.version)
+          : undefined
+
       skills[parsed.data.name] = {
         name: parsed.data.name,
         description: parsed.data.description,
         location: match,
         content: md.content,
+        version,
       }
     }
 
diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts
new file mode 100644
index 0000000..66e9fe0
--- /dev/null
+++ b/packages/cli/test/tool-catalog.test.ts
@@ -0,0 +1,239 @@
+/**
+ * TDD tests for the `tool_catalog` NDJSON event (issue #85).
+ *
+ * Tests verify:
+ * 1. buildToolCatalogItems() returns builtin tools (source: builtin)
+ * 2. MCP tools appear when an MCP server is attached (source: mcp, correct server)
+ * 3. MCP tools are absent when no MCP server is attached
+ * 4. Skills appear in the separate skills[] array with name + version
+ * 5. Skills have version: null when no version is in frontmatter
+ */
+import { describe, expect, test } from "bun:test"
+import { Instance } from "../src/project/instance"
+import { tmpdir } from "./fixture/fixture"
+import { buildToolCatalogItems } from "../src/cli/cmd/tool-catalog"
+import type { SkillCatalogEntry, ToolCatalogEntry } from "../src/cli/cmd/tool-catalog"
+import path from "path"
+import fs from "fs/promises"
+
+// Helper to create a minimal SKILL.md with optional version in frontmatter
+async function writeSkill(
+  dir: string,
+  name: string,
+  description: string,
+  version?: string,
+): Promise<void> {
+  const versionLine = version ? `version: ${version}\n` : ""
+  const frontmatter = `---\nname: ${name}\ndescription: ${description}\n${versionLine}---`
+  const skillDir = path.join(dir, ".claude", "skills", name)
+  await fs.mkdir(skillDir, { recursive: true })
+  await fs.writeFile(path.join(skillDir, "SKILL.md"), `${frontmatter}\n\nSkill content here.\n`)
+}
+
+// Fake MCP tool provider — simulates a connected MCP server with given tools
+function fakeMcpTools(serverName: string, toolNames: string[]): () => Promise<{ toolKey: string; serverName: string }[]> {
+  return async () =>
+    toolNames.map((n) => ({
+      toolKey: `${serverName}_${n}`,
+      serverName,
+    }))
+}
+
+// Fake MCP provider returning no tools
+function noMcpTools(): () => Promise<{ toolKey: string; serverName: string }[]> {
+  return async () => []
+}
+
+describe("tool_catalog event helpers", () => {
+  describe("buildToolCatalogItems — builtin tools", () => {
+    test("builtin tools have source=builtin and a name", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() })
+
+          expect(tools.length).toBeGreaterThan(0)
+
+          const builtins = tools.filter((t: ToolCatalogEntry) => t.source === "builtin")
+          expect(builtins.length).toBeGreaterThan(0)
+
+          // All builtin tools must have a name
+          for (const t of builtins) {
+            expect(typeof t.name).toBe("string")
+            expect(t.name.length).toBeGreaterThan(0)
+          }
+
+          // Known builtin tools that are always present
+          const names = builtins.map((t: ToolCatalogEntry) => t.name)
+          expect(names).toContain("bash")
+          expect(names).toContain("read")
+          expect(names).toContain("edit")
+        },
+      })
+    })
+
+    test("builtin tools do NOT have a server field", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() })
+          const builtins = tools.filter((t: ToolCatalogEntry) => t.source === "builtin")
+          for (const t of builtins) {
+            expect(t.server).toBeUndefined()
+          }
+        },
+      })
+    })
+  })
+
+  describe("buildToolCatalogItems — MCP tools", () => {
+    test("MCP tools are absent when no MCP server is configured", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools } = await buildToolCatalogItems({ getMcpTools: noMcpTools() })
+          const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp")
+          expect(mcpTools.length).toBe(0)
+        },
+      })
+    })
+
+    test("MCP tools appear with source=mcp and correct server name when attached", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools } = await buildToolCatalogItems({
+            getMcpTools: fakeMcpTools("aictrl", ["record_finding", "record_review_completed"]),
+          })
+
+          const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp")
+          expect(mcpTools.length).toBe(2)
+
+          const names = mcpTools.map((t: ToolCatalogEntry) => t.name)
+          expect(names).toContain("aictrl_record_finding")
+          expect(names).toContain("aictrl_record_review_completed")
+
+          // MCP tools carry the server name
+          for (const t of mcpTools) {
+            expect(t.server).toBe("aictrl")
+          }
+        },
+      })
+    })
+
+    test("consumer gate: record_finding absent when a different server is attached (tool exposed vs missing)", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          // Attach a server that does NOT have record_finding
+          const { tools } = await buildToolCatalogItems({
+            getMcpTools: fakeMcpTools("other_server", ["some_other_tool"]),
+          })
+
+          const names = tools.map((t: ToolCatalogEntry) => t.name)
+          // record_finding is NOT in the list — consumer can structurally detect this
+          expect(names).not.toContain("aictrl_record_finding")
+          expect(names).not.toContain("aictrl_record_review_completed")
+          // other_server tool IS present
+          expect(names).toContain("other_server_some_other_tool")
+        },
+      })
+    })
+
+    test("multiple MCP servers: tools from all servers appear in tools[]", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const getMcpTools = async () => [
+            { toolKey: "aictrl_record_finding", serverName: "aictrl" },
+            { toolKey: "other_another_tool", serverName: "other" },
+          ]
+
+          const { tools } = await buildToolCatalogItems({ getMcpTools })
+          const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp")
+          expect(mcpTools.length).toBe(2)
+
+          const aictrlTool = mcpTools.find((t: ToolCatalogEntry) => t.name === "aictrl_record_finding")
+          expect(aictrlTool?.server).toBe("aictrl")
+
+          const otherTool = mcpTools.find((t: ToolCatalogEntry) => t.name === "other_another_tool")
+          expect(otherTool?.server).toBe("other")
+        },
+      })
+    })
+  })
+
+  describe("buildToolCatalogItems — skills", () => {
+    test("skills[] is separate from tools[] and carries name + version", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await writeSkill(tmp.path, "code-review", "Perform code reviews", "1.4.0")
+      await writeSkill(tmp.path, "commit", "Create commits", "0.2.0")
+
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools, skills } = await buildToolCatalogItems({ getMcpTools: noMcpTools() })
+
+          // Skills are NOT in tools[]
+          const skillNamesInTools = tools.map((t: ToolCatalogEntry) => t.name).filter((n) => n === "code-review" || n === "commit")
+          expect(skillNamesInTools.length).toBe(0)
+
+          // Skills are in skills[]
+          expect(skills.length).toBeGreaterThanOrEqual(2)
+
+          const codeReview = skills.find((s: SkillCatalogEntry) => s.name === "code-review")
+          expect(codeReview).toBeDefined()
+          expect(codeReview!.version).toBe("1.4.0")
+
+          const commit = skills.find((s: SkillCatalogEntry) => s.name === "commit")
+          expect(commit).toBeDefined()
+          expect(commit!.version).toBe("0.2.0")
+        },
+      })
+    })
+
+    test("skills with no version in frontmatter have version=null", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await writeSkill(tmp.path, "no-version-skill", "A skill without version")
+
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { skills } = await buildToolCatalogItems({ getMcpTools: noMcpTools() })
+
+          const s = skills.find((s: SkillCatalogEntry) => s.name === "no-version-skill")
+          expect(s).toBeDefined()
+          expect(s!.version).toBeNull()
+        },
+      })
+    })
+
+    test("skills[] does not include MCP tools or builtins", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await writeSkill(tmp.path, "my-skill", "A skill", "1.0.0")
+
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { tools, skills } = await buildToolCatalogItems({
+            getMcpTools: fakeMcpTools("aictrl", ["record_finding"]),
+          })
+
+          // skills[] should only contain skills
+          expect(skills.every((s: SkillCatalogEntry) => typeof s.name === "string" && "version" in s)).toBe(true)
+
+          // No source field on skill entries
+          for (const s of skills) {
+            expect((s as any).source).toBeUndefined()
+          }
+        },
+      })
+    })
+  })
+})

From 035ea0f7acaab118be17fc8df78d63097180fbf3 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 07:57:38 +0100
Subject: [PATCH 2/4] refactor(skill): generalize version tracking into
 metadata-retention capability (#88)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of a one-off `version` field bolt-on, `Skill.Info` now carries a
`metadata: Record<string, string>` field that retains every SKILL.md
frontmatter key beyond `name`/`description`, with values string-coerced
(handles YAML numbers like `version: 1.0` → `"1.0"`).

`version` becomes a derived convenience accessor (`metadata.version`) rather
than a parallel extraction path. The dual parse path is gone — one coherent
pass builds `metadata`, then derives `version` from it.

Changes:
- `Skill.Info`: add `metadata: z.record(z.string(), z.string()).default({})`;
  keep `version: z.string().optional()` as derived accessor
- Loader: replace separate `"version" in md.data` extraction with a single
  `Object.fromEntries(…filter name/description…map String)` pass
- Tests: add "non-version frontmatter keys survive in skill.metadata" test
  (license+author keys survive; name/description excluded; version still
  resolves via metadata.version) — written RED first, then GREEN

`tool_catalog` event payload unchanged — still emits `version` sourced from
`metadata.version`. `metadata` not added to event payload (kept lean per spec).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ
---
 packages/cli/src/skill/skill.ts        | 30 ++++++++++++++----
 packages/cli/test/tool-catalog.test.ts | 44 ++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts
index 729e7d6..c2e17d0 100644
--- a/packages/cli/src/skill/skill.ts
+++ b/packages/cli/src/skill/skill.ts
@@ -21,7 +21,16 @@ export namespace Skill {
     description: z.string(),
     location: z.string(),
     content: z.string(),
-    /** Version from SKILL.md frontmatter `version` field. undefined when not declared. */
+    /**
+     * All frontmatter keys beyond `name`/`description`, with values
+     * string-coerced. Retains arbitrary skill metadata (e.g. version, license,
+     * author) without requiring per-field changes to the schema.
+     */
+    metadata: z.record(z.string(), z.string()).default({}),
+    /**
+     * Convenience accessor derived from `metadata.version`.
+     * undefined when the frontmatter has no `version` key.
+     */
     version: z.string().optional(),
   })
   export type Info = z.infer<typeof Info>
@@ -81,18 +90,25 @@ export namespace Skill {
 
       dirs.add(path.dirname(match))
 
-      // Capture version from frontmatter if present (used by tool_catalog event)
-      const version =
-        typeof md.data === "object" && md.data !== null && "version" in md.data
-          ? String(md.data.version)
-          : undefined
+      // Build metadata from every frontmatter key except name/description,
+      // string-coercing values (handles YAML numbers like `version: 1.0` → "1.0").
+      // version is derived from metadata.version as a convenience accessor.
+      const metadata: Record<string, string> =
+        typeof md.data === "object" && md.data !== null
+          ? Object.fromEntries(
+              Object.entries(md.data)
+                .filter(([k]) => k !== "name" && k !== "description")
+                .map(([k, v]) => [k, String(v)]),
+            )
+          : {}
 
       skills[parsed.data.name] = {
         name: parsed.data.name,
         description: parsed.data.description,
         location: match,
         content: md.content,
-        version,
+        metadata,
+        version: metadata.version,
       }
     }
 
diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts
index 66e9fe0..ab892f9 100644
--- a/packages/cli/test/tool-catalog.test.ts
+++ b/packages/cli/test/tool-catalog.test.ts
@@ -214,6 +214,50 @@ describe("tool_catalog event helpers", () => {
       })
     })
 
+    test("non-version frontmatter keys survive in skill.metadata", async () => {
+      await using tmp = await tmpdir({ git: true })
+      // SKILL.md with license, author, and version in frontmatter
+      const skillDir = path.join(tmp.path, ".claude", "skills", "licensed-skill")
+      await fs.mkdir(skillDir, { recursive: true })
+      await fs.writeFile(
+        path.join(skillDir, "SKILL.md"),
+        [
+          "---",
+          "name: licensed-skill",
+          "description: A skill with extra frontmatter",
+          "version: 2.1.0",
+          "license: MIT",
+          "author: aictrl-team",
+          "---",
+          "",
+          "Skill content here.",
+        ].join("\n"),
+      )
+
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const { Skill } = await import("../src/skill")
+          const skills = await Skill.all()
+
+          const s = skills.find((x) => x.name === "licensed-skill")
+          expect(s).toBeDefined()
+
+          // version still resolves via metadata.version
+          expect(s!.version).toBe("2.1.0")
+
+          // non-version keys survive in metadata
+          expect(s!.metadata).toBeDefined()
+          expect(s!.metadata["license"]).toBe("MIT")
+          expect(s!.metadata["author"]).toBe("aictrl-team")
+
+          // name and description are NOT duplicated in metadata
+          expect(s!.metadata["name"]).toBeUndefined()
+          expect(s!.metadata["description"]).toBeUndefined()
+        },
+      })
+    })
+
     test("skills[] does not include MCP tools or builtins", async () => {
       await using tmp = await tmpdir({ git: true })
       await writeSkill(tmp.path, "my-skill", "A skill", "1.0.0")

From 1a10f2306b923aa1da85bd03378c12d72c9c5cf8 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 09:05:29 +0100
Subject: [PATCH 3/4] fix(tool-catalog): address code review findings from PR
 #88
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- run.ts: guard buildToolCatalogItems on args.format==="json" so MCP
  listTools + skill scan are skipped for interactive sessions (#88 🟡)
- run.ts: log catalog build failures in catch instead of silencing them
  so a missing tool_catalog event is diagnosable (#88 🟠)
- mcp/index.ts: extract mcpToolKey() helper shared by tools() and
  toolEntries() — single source of truth for key derivation (#88 🟡)
- EVENTS.md: document that builtin tools[] reflects the instance-level
  superset, not the per-model filtered dispatch set (#88 🟠)
- test: regression tests for error propagation and key sanitization

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ
---
 EVENTS.md                              |  2 ++
 packages/cli/src/cli/cmd/run.ts        | 24 ++++++++++----
 packages/cli/src/mcp/index.ts          | 23 +++++++++-----
 packages/cli/test/tool-catalog.test.ts | 44 ++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/EVENTS.md b/EVENTS.md
index 5ed280c..5935d4e 100644
--- a/EVENTS.md
+++ b/EVENTS.md
@@ -50,6 +50,8 @@ Emitted once per session, immediately after `session_start` and before the first
 - **`version`** (string or null, **required**) — version string from the `version` field in `SKILL.md` frontmatter. `null` when no version is declared.
 
 > **Use case:** The server-side completion gate (aictrl-dev/aictrl #3216) uses `tools[]` to verify that `record_finding` and `record_review_completed` were actually in the model's function list at dispatch time, and `skills[]` to record which skill version produced the review. Detects the "silent success" failure mode where a missing MCP server lets a run complete green without ever having the review tools available.
+>
+> **Note on builtin tool filtering:** The `source: "builtin"` entries in `tools[]` reflect the _instance-level superset_ registered in `ToolRegistry`. Per-model filters (e.g. `apply_patch` only on gpt-*, `codesearch`/`websearch` only for aictrl provider) are applied at dispatch time inside `resolveTools` and are **not** reflected here. Consumers should treat the presence of a builtin tool name as "registered and potentially available", not as "guaranteed to appear in the model's function list". The strong structural guarantee (tool present ↔ tool in dispatch list) applies only to `source: "mcp"` entries.
 
 ### `session_start`
 
diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index 86f9d91..2e54c69 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -708,13 +708,23 @@ export const RunCommand = cmd({
       // Emit the resolved tool catalog (builtin + MCP tools) and available
       // skills before the first model turn. Enables structural detection of
       // "tool was not exposed" failure modes (issue #85).
-      await buildToolCatalogItems()
-        .then(({ tools, skills }) => {
-          emit("tool_catalog", { tools, skills })
-        })
-        .catch(() => {
-          // Never let catalog collection block or crash the session.
-        })
+      // Only build the catalog when output is JSON — emit() is a no-op
+      // otherwise, and the catalog construction (MCP listTools + skill scan)
+      // adds unnecessary latency for interactive sessions.
+      if (args.format === "json") {
+        await buildToolCatalogItems()
+          .then(({ tools, skills }) => {
+            emit("tool_catalog", { tools, skills })
+          })
+          .catch((err) => {
+            // Never let catalog collection block or crash the session, but
+            // surface the failure so a missing tool_catalog event is diagnosable.
+            console.error(
+              "failed to emit tool_catalog",
+              err instanceof Error ? err.message : String(err),
+            )
+          })
+      }
 
       const loopDone = loop()
         .then(() => {
diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts
index e48afe0..e84863d 100644
--- a/packages/cli/src/mcp/index.ts
+++ b/packages/cli/src/mcp/index.ts
@@ -600,6 +600,17 @@ export namespace MCP {
     s.status[name] = { status: "disabled" }
   }
 
+  /**
+   * Single source of truth for the MCP tool key used to identify a tool
+   * both in `tools()` (dispatch) and `toolEntries()` (catalog). Both paths
+   * must produce byte-identical keys so catalog consumers get correct answers.
+   */
+  function mcpToolKey(clientName: string, toolName: string): string {
+    const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_")
+    const sanitizedToolName = toolName.replace(/[^a-zA-Z0-9_-]/g, "_")
+    return sanitizedClientName + "_" + sanitizedToolName
+  }
+
   export async function tools() {
     const result: Record<string, Tool> = {}
     const s = await state()
@@ -634,9 +645,7 @@ export namespace MCP {
       const entry = isMcpConfigured(mcpConfig) ? mcpConfig : undefined
       const timeout = entry?.timeout ?? defaultTimeout
       for (const mcpTool of toolsResult.tools) {
-        const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_")
-        const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_")
-        result[sanitizedClientName + "_" + sanitizedToolName] = await convertMcpTool(mcpTool, client, timeout)
+        result[mcpToolKey(clientName, mcpTool.name)] = await convertMcpTool(mcpTool, client, timeout)
       }
     }
     return result
@@ -647,8 +656,8 @@ export namespace MCP {
    * `tool_catalog` NDJSON event (issue #85). Each entry carries the tool key
    * (as exposed to the model) and the originating server name.
    *
-   * This is intentionally separate from `tools()` to avoid the overhead of
-   * building full AI-SDK tool objects just for catalog reporting.
+   * Uses `mcpToolKey()` — the same helper as `tools()` — so the key format is
+   * guaranteed identical to what `resolveTools` dispatches to the model.
    */
   export async function toolEntries(): Promise<{ toolKey: string; serverName: string }[]> {
     const result: { toolKey: string; serverName: string }[] = []
@@ -671,11 +680,9 @@ export namespace MCP {
 
     for (const { clientName, toolsResult } of toolsResults) {
       if (!toolsResult) continue
-      const sanitizedClientName = clientName.replace(/[^a-zA-Z0-9_-]/g, "_")
       for (const mcpTool of toolsResult.tools) {
-        const sanitizedToolName = mcpTool.name.replace(/[^a-zA-Z0-9_-]/g, "_")
         result.push({
-          toolKey: sanitizedClientName + "_" + sanitizedToolName,
+          toolKey: mcpToolKey(clientName, mcpTool.name),
           serverName: clientName,
         })
       }
diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts
index ab892f9..c31bb86 100644
--- a/packages/cli/test/tool-catalog.test.ts
+++ b/packages/cli/test/tool-catalog.test.ts
@@ -280,4 +280,48 @@ describe("tool_catalog event helpers", () => {
       })
     })
   })
+
+  describe("buildToolCatalogItems — error propagation", () => {
+    test("rejects when a dep throws, so the caller catch can log it (regression for silent-swallow bug)", async () => {
+      // If buildToolCatalogItems swallowed errors itself, this would resolve.
+      // It must reject so callers have a chance to log before discarding.
+      const throwingDep = async (): Promise<{ toolKey: string; serverName: string }[]> => {
+        throw new Error("simulated MCP failure")
+      }
+
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          await expect(
+            buildToolCatalogItems({ getMcpTools: throwingDep }),
+          ).rejects.toThrow("simulated MCP failure")
+        },
+      })
+    })
+  })
+
+  describe("MCP tool key format", () => {
+    test("special characters in server/tool names are sanitized to underscores", async () => {
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          // Simulate a server name with special chars
+          const getMcpTools = async () => [
+            { toolKey: "my_server_my_tool", serverName: "my.server" },
+            { toolKey: "other__server_tool__name", serverName: "other/ server" },
+          ]
+
+          const { tools } = await buildToolCatalogItems({ getMcpTools })
+          const mcpTools = tools.filter((t: ToolCatalogEntry) => t.source === "mcp")
+
+          // Keys must match sanitized format: [a-zA-Z0-9_-] only
+          for (const t of mcpTools) {
+            expect(t.name).toMatch(/^[a-zA-Z0-9_-]+$/)
+          }
+        },
+      })
+    })
+  })
 })

From 8dae0c93471a5563f7f3c581c2023d8ef761dbb0 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 10:45:16 +0100
Subject: [PATCH 4/4] fix(tool-catalog): address round-2 review findings on PR
 #88
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- EVENTS.md: reorder tool_catalog section after session_start to match
  actual emission order (bot finding: section placed before session_start)
- run.ts: wrap buildToolCatalogItems() with 10s timeout so a hanging MCP
  server cannot stall session start indefinitely; on failure emit a
  structured tool_catalog_error event to stdout instead of console.error
  to stderr, so JSON consumers can distinguish failure from empty catalog
- skill.ts: add comment clarifying that metadata/version are type-only
  fields not validated by the Info.pick() Zod parse — both are populated
  via manual Object.fromEntries coercion after parse succeeds
- test: add 3 regression tests for timeout, structured error path, and
  catalog rejection propagation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ
---
 EVENTS.md                              | 40 +++++++++++-----------
 packages/cli/src/cli/cmd/run.ts        | 22 ++++++++----
 packages/cli/src/skill/skill.ts        |  8 +++++
 packages/cli/test/tool-catalog.test.ts | 46 ++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/EVENTS.md b/EVENTS.md
index 5935d4e..707ea9f 100644
--- a/EVENTS.md
+++ b/EVENTS.md
@@ -14,6 +14,26 @@ The schema is versioned via `session_start.schemaVersion`. This document describ
 
 ## Lifecycle Events
 
+### `session_start`
+
+Emitted once when the session begins.
+
+```json
+{
+  "type": "session_start",
+  "schemaVersion": "1",
+  "model": "anthropic/claude-sonnet-4-20250514",
+  "agent": "default",
+  "permissions": [
+    { "permission": "bash", "pattern": "*", "action": "allow" },
+    { "permission": "write", "pattern": "*", "action": "ask" }
+  ]
+}
+```
+
+- `schemaVersion` (string, **required**) — pinned contract version. Currently `"1"`.
+- `permissions` (array, **required**) — the fully resolved permission ruleset for the session (merge of agent + session rules). In headless mode, any permission not matching an `allow` rule is auto-rejected.
+
 ### `tool_catalog`
 
 Emitted once per session, immediately after `session_start` and before the first model turn (present even for zero-turn or immediately-failing runs). Lists every tool and skill that was resolved for the session so consumers can structurally verify tool exposure without string-matching the model's prose.
@@ -53,26 +73,6 @@ Emitted once per session, immediately after `session_start` and before the first
 >
 > **Note on builtin tool filtering:** The `source: "builtin"` entries in `tools[]` reflect the _instance-level superset_ registered in `ToolRegistry`. Per-model filters (e.g. `apply_patch` only on gpt-*, `codesearch`/`websearch` only for aictrl provider) are applied at dispatch time inside `resolveTools` and are **not** reflected here. Consumers should treat the presence of a builtin tool name as "registered and potentially available", not as "guaranteed to appear in the model's function list". The strong structural guarantee (tool present ↔ tool in dispatch list) applies only to `source: "mcp"` entries.
 
-### `session_start`
-
-Emitted once when the session begins.
-
-```json
-{
-  "type": "session_start",
-  "schemaVersion": "1",
-  "model": "anthropic/claude-sonnet-4-20250514",
-  "agent": "default",
-  "permissions": [
-    { "permission": "bash", "pattern": "*", "action": "allow" },
-    { "permission": "write", "pattern": "*", "action": "ask" }
-  ]
-}
-```
-
-- `schemaVersion` (string, **required**) — pinned contract version. Currently `"1"`.
-- `permissions` (array, **required**) — the fully resolved permission ruleset for the session (merge of agent + session rules). In headless mode, any permission not matching an `allow` rule is auto-rejected.
-
 ### `session_complete`
 
 Emitted once when the session ends (success or failure).
diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index 2e54c69..fa9ec9b 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -5,6 +5,7 @@ import { UI } from "../ui"
 import { cmd } from "./cmd"
 import { classifySessionError, SCHEMA_VERSION } from "./run.errors"
 import { buildToolCatalogItems } from "./tool-catalog"
+import { withTimeout } from "../../util/timeout"
 import { Flag } from "../../flag/flag"
 import { bootstrap } from "../bootstrap"
 import { EOL } from "os"
@@ -711,18 +712,25 @@ export const RunCommand = cmd({
       // Only build the catalog when output is JSON — emit() is a no-op
       // otherwise, and the catalog construction (MCP listTools + skill scan)
       // adds unnecessary latency for interactive sessions.
+      //
+      // Bounded by TOOL_CATALOG_TIMEOUT_MS so a hanging MCP server cannot
+      // indefinitely stall session start. On timeout or error the failure is
+      // emitted as a structured stdout event (tool_catalog_error) so JSON
+      // consumers can distinguish "catalog failed" from "catalog succeeded with
+      // no tools" — stderr is invisible to consumers reading the NDJSON stream.
+      const TOOL_CATALOG_TIMEOUT_MS = 10_000
       if (args.format === "json") {
-        await buildToolCatalogItems()
+        await withTimeout(buildToolCatalogItems(), TOOL_CATALOG_TIMEOUT_MS)
           .then(({ tools, skills }) => {
             emit("tool_catalog", { tools, skills })
           })
           .catch((err) => {
-            // Never let catalog collection block or crash the session, but
-            // surface the failure so a missing tool_catalog event is diagnosable.
-            console.error(
-              "failed to emit tool_catalog",
-              err instanceof Error ? err.message : String(err),
-            )
+            // Never let catalog collection block or crash the session.
+            // Emit a structured event so consumers reading stdout can detect
+            // the failure rather than silently missing the tool_catalog event.
+            emit("tool_catalog_error", {
+              error: err instanceof Error ? err.message : String(err),
+            })
           })
       }
 
diff --git a/packages/cli/src/skill/skill.ts b/packages/cli/src/skill/skill.ts
index c2e17d0..0084e59 100644
--- a/packages/cli/src/skill/skill.ts
+++ b/packages/cli/src/skill/skill.ts
@@ -25,11 +25,19 @@ export namespace Skill {
      * All frontmatter keys beyond `name`/`description`, with values
      * string-coerced. Retains arbitrary skill metadata (e.g. version, license,
      * author) without requiring per-field changes to the schema.
+     *
+     * NOTE: `metadata` and `version` below are TYPE-ONLY fields — they are NOT
+     * validated by the `Info.pick({ name, description })` Zod parse in `addSkill`.
+     * Both are populated manually from raw frontmatter after the parse succeeds
+     * (see the `Object.fromEntries` block below). The schema fields exist solely
+     * to give `Skill.Info` a correct TypeScript type; runtime coercion is
+     * intentional to handle e.g. `version: 1.0` (YAML number) → `"1.0"` (string).
      */
     metadata: z.record(z.string(), z.string()).default({}),
     /**
      * Convenience accessor derived from `metadata.version`.
      * undefined when the frontmatter has no `version` key.
+     * TYPE-ONLY — see note on `metadata` above.
      */
     version: z.string().optional(),
   })
diff --git a/packages/cli/test/tool-catalog.test.ts b/packages/cli/test/tool-catalog.test.ts
index c31bb86..8b4df1e 100644
--- a/packages/cli/test/tool-catalog.test.ts
+++ b/packages/cli/test/tool-catalog.test.ts
@@ -299,6 +299,52 @@ describe("tool_catalog event helpers", () => {
         },
       })
     })
+
+    test("withTimeout wrapping surfaces a timed-out catalog as a rejection (regression: hanging MCP server must not stall session start)", async () => {
+      // This test verifies that when buildToolCatalogItems is wrapped with
+      // withTimeout (as done in run.ts), a never-resolving dep causes the
+      // outer promise to reject — NOT to hang indefinitely. The session-start
+      // guard in run.ts catches this and emits tool_catalog_error to stdout.
+      const { withTimeout } = await import("../src/util/timeout")
+
+      const neverResolvingDep = (): Promise<{ toolKey: string; serverName: string }[]> =>
+        new Promise(() => {
+          // intentionally never resolves or rejects — simulates a hanging MCP client
+        })
+
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          await expect(
+            withTimeout(buildToolCatalogItems({ getMcpTools: neverResolvingDep }), 50),
+          ).rejects.toThrow(/timed out/i)
+        },
+      })
+    })
+
+    test("structured error path: catalog failure produces diagnosable error message (regression: console.error to stdout event)", async () => {
+      // Verifies that the rejection from buildToolCatalogItems carries enough
+      // information for the caller to construct a structured tool_catalog_error event.
+      // The key requirement: error.message must be a non-empty string.
+      const throwingDep = async (): Promise<{ toolKey: string; serverName: string }[]> => {
+        throw new Error("MCP server connection refused")
+      }
+
+      await using tmp = await tmpdir({ git: true })
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          let capturedMessage: string | undefined
+          await buildToolCatalogItems({ getMcpTools: throwingDep }).catch((err) => {
+            capturedMessage = err instanceof Error ? err.message : String(err)
+          })
+          expect(capturedMessage).toBeDefined()
+          expect(typeof capturedMessage).toBe("string")
+          expect(capturedMessage!.length).toBeGreaterThan(0)
+        },
+      })
+    })
   })
 
   describe("MCP tool key format", () => {