From 7419f869ad470c8973a463f007893b9407fdc249 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 29 Jun 2026 10:35:13 +0000 Subject: [PATCH 1/5] =?UTF-8?q?feat(examples,docs):=20add=20the=20referenc?= =?UTF-8?q?e=20host/server=20pair=20=E2=80=94=20cli-client=20and=20todos-s?= =?UTF-8?q?erver?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit examples/cli-client is a complete LLM-connected MCP host: an interactive chat CLI with no built-in tools, where everything comes from the servers it connects to (a URL via --server with OAuth on 401, a spawned command line, or an mcpServers-style config). The model sits behind a small LLMProvider seam with Scripted (keyless, used by CI), Anthropic, OpenAI, and Gemini implementations that resolve the latest mid-tier model from each provider's models API. The host wires the full client feature surface: namespaced tool loop, @-mention resources as provenance-labelled context, /watch subscriptions on both protocol eras, prompts as slash commands with tab completion backed by completion/complete, approval-gated sampling, schema-driven elicitation forms, roots, per-call progress and server logs, Ctrl-C cancellation, and full OAuth (PKCE, dynamic client registration, state-checked loopback callback). examples/todos-server is the workload it pairs with: a todo board serving both protocol revisions from one codebase over stdio and Streamable HTTP, where every server feature has a real job — CRUD tools (one with structuredContent), sampling- backed prioritize, an elicitation-confirmed bulk delete, a multi-round brainstorm flow whose requestState is a step-discriminated union signed via createRequestStateCodec, paced progress with cancellation observation, request-tied logging, resources with a completable template, and per-resource subscriptions. cli-client/client.ts replays a scripted conversation against todos-server as a self-verifying run:examples story across both transports and eras, asserting the loop, sampling, the multi-round + signed-state flow, completions, cancellation, progress, logging, and subscriptions end to end; story-local vitest covers the provider mappings, routing, config parsing, forms, and OAuth helpers. docs/host-integration.md is the companion "Building a host" guide: who should (and should not) build a host, the provider seam and the tool loop (snippets synced from the example source), then per-feature guidance narrated against the pair. --- docs/documents.md | 2 + docs/host-integration.md | 293 +++++++++ examples/README.md | 42 +- examples/cli-client/.gitignore | 1 + examples/cli-client/README.md | 145 +++++ examples/cli-client/cli.ts | 154 +++++ examples/cli-client/client.ts | 48 ++ examples/cli-client/config.example.json | 8 + examples/cli-client/host/auth.ts | 200 ++++++ examples/cli-client/host/config.ts | 115 ++++ examples/cli-client/host/content.ts | 98 +++ examples/cli-client/host/host.ts | 528 +++++++++++++++ examples/cli-client/host/loop.ts | 270 ++++++++ examples/cli-client/host/naming.ts | 22 + examples/cli-client/host/ui.ts | 377 +++++++++++ examples/cli-client/package.json | 33 + examples/cli-client/providers/anthropic.ts | 141 ++++ examples/cli-client/providers/gemini.ts | 127 ++++ examples/cli-client/providers/openai.ts | 139 ++++ examples/cli-client/providers/provider.ts | 73 +++ examples/cli-client/providers/scripted.ts | 46 ++ examples/cli-client/script/scriptedUi.ts | 83 +++ examples/cli-client/script/session.ts | 343 ++++++++++ examples/cli-client/server.ts | 6 + examples/cli-client/test/auth.test.ts | 142 ++++ examples/cli-client/test/forms.test.ts | 64 ++ examples/cli-client/test/host.test.ts | 96 +++ examples/cli-client/test/providers.test.ts | 168 +++++ examples/cli-client/vitest.config.js | 8 + examples/todos-server/README.md | 81 +++ examples/todos-server/package.json | 22 + examples/todos-server/server.ts | 29 + examples/todos-server/todos.ts | 713 +++++++++++++++++++++ pnpm-lock.yaml | 332 ++++++++++ 34 files changed, 4930 insertions(+), 19 deletions(-) create mode 100644 docs/host-integration.md create mode 100644 examples/cli-client/.gitignore create mode 100644 examples/cli-client/README.md create mode 100644 examples/cli-client/cli.ts create mode 100644 examples/cli-client/client.ts create mode 100644 examples/cli-client/config.example.json create mode 100644 examples/cli-client/host/auth.ts create mode 100644 examples/cli-client/host/config.ts create mode 100644 examples/cli-client/host/content.ts create mode 100644 examples/cli-client/host/host.ts create mode 100644 examples/cli-client/host/loop.ts create mode 100644 examples/cli-client/host/naming.ts create mode 100644 examples/cli-client/host/ui.ts create mode 100644 examples/cli-client/package.json create mode 100644 examples/cli-client/providers/anthropic.ts create mode 100644 examples/cli-client/providers/gemini.ts create mode 100644 examples/cli-client/providers/openai.ts create mode 100644 examples/cli-client/providers/provider.ts create mode 100644 examples/cli-client/providers/scripted.ts create mode 100644 examples/cli-client/script/scriptedUi.ts create mode 100644 examples/cli-client/script/session.ts create mode 100644 examples/cli-client/server.ts create mode 100644 examples/cli-client/test/auth.test.ts create mode 100644 examples/cli-client/test/forms.test.ts create mode 100644 examples/cli-client/test/host.test.ts create mode 100644 examples/cli-client/test/providers.test.ts create mode 100644 examples/cli-client/vitest.config.js create mode 100644 examples/todos-server/README.md create mode 100644 examples/todos-server/package.json create mode 100644 examples/todos-server/server.ts create mode 100644 examples/todos-server/todos.ts diff --git a/docs/documents.md b/docs/documents.md index 65cff9749c..22721e6cde 100644 --- a/docs/documents.md +++ b/docs/documents.md @@ -5,6 +5,7 @@ children: - ./server.md - ./client-quickstart.md - ./client.md + - ./host-integration.md - ./faq.md --- @@ -14,4 +15,5 @@ children: - [Server](./server.md) – building MCP servers: transports, tools, resources, prompts, server-initiated requests, and deployment - [Client Quickstart](./client-quickstart.md) – build an LLM-powered chatbot that connects to an MCP server and calls its tools - [Client](./client.md) – building MCP clients: connecting, tools, resources, prompts, server-initiated requests, and error handling +- [Building a Host](./host-integration.md) – turning the client APIs into an application: the tool loop, resources as context, prompts as commands, sampling, elicitation, roots, and auth, walked through `examples/cli-client` - [FAQ](./faq.md) – frequently asked questions and troubleshooting diff --git a/docs/host-integration.md b/docs/host-integration.md new file mode 100644 index 0000000000..4277b0af4a --- /dev/null +++ b/docs/host-integration.md @@ -0,0 +1,293 @@ +--- +title: Building a Host +--- + +# Building a host + +A _host_ is the application that sits between users, a language model, and MCP servers: Claude, an IDE, a chat product, an internal tool with its own UI, a custom agent runtime. The SDK gives you the protocol verbs (`listTools`, `callTool`, `readResource`, …); this guide covers the part the protocol deliberately leaves to you — the application behaviors that turn those verbs into something a user can feel: tools the model actually calls, resources that become context, prompts that become commands, sampling and elicitation that round-trip through your UI. + +Everything here is narrated against [`examples/cli-client`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client) — a minimal but complete host you can run, read, and copy from. For the protocol-level concepts behind each feature, see the spec site's [client concepts](https://modelcontextprotocol.io/docs/learn/client-concepts) and [architecture](https://modelcontextprotocol.io/docs/learn/architecture) pages; this guide does not restate them. + +## Do you actually need to build a host? + +Most applications should not hand-roll this layer. Pick the first row that matches and stop there: + +| You are… | Do this instead | +| ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| Building an MCP **server** | Start at the [server quickstart](./server-quickstart.md) — you never write a host. | +| Bringing tools into **an existing host** (Claude, ChatGPT, Cursor, an IDE) | Configure your server in that host; read its docs. Nothing to build. | +| Calling a model API that offers a **hosted MCP connector** | Use the provider's connector and pass it your server's URL — the provider runs the loop for you. | +| Building on an **agent framework** that already speaks MCP (Claude Agent SDK, Vercel AI SDK, Pydantic AI, …) | Use the framework's MCP support; it owns the loop and the feature wiring. | +| Building the application that owns the conversation — an IDE, a chat product, an internal tool, your own agent runtime | You are building a host. Keep reading. | + +The narrow audience of this guide is the one that decides whether anything beyond tools ever gets used: hosts are where resources, prompts, sampling, and elicitation either become product features or stay dead protocol surface. + +## The mental model + +A host is a conduit between a model and servers it does not trust: + +1. Discover what each configured server offers (`tools/list`, `resources/list`, `prompts/list`). +2. Hand the model the tool definitions, namespaced per server. +3. Execute the tool calls the model makes — against the server that owns them — and feed the results back, verbatim and labelled. +4. Surface everything that needs a human (sampling approval, elicitation forms, OAuth) through your UI. + +The model never talks to a server directly; your host is the only thing that does. That makes the host responsible for the two judgement calls the protocol cannot make for you: _what the model gets to see_ (context, truncation, provenance) and _what the user gets to approve_ (sampling, destructive actions, credentials). + +cli-client's shape, which this guide walks through: + +```text +examples/cli-client/ (paired with examples/todos-server, the reference server) + cli.ts interactive entry host/host.ts connections, routing, handlers + client.ts scripted CI entry host/loop.ts the conversation loop + providers/ the LLM provider seam host/auth.ts OAuth for protected servers + host/ui.ts terminal UI + elicitation forms +``` + +## The provider seam + +The single most useful structural decision in a host is a thin interface between "the conversation" and "whatever model API you use". In cli-client that seam is `LLMProvider`: + +```ts source="../examples/cli-client/providers/provider.ts#llmProvider" +export interface ToolDefinition { + /** Namespaced tool name as exposed to the model (e.g. `mcp__todos__add_task`). */ + name: string; + description?: string; + /** JSON Schema for the tool's arguments, passed through from the MCP `Tool.inputSchema`. */ + inputSchema: Record; +} + +export type ContentPart = { type: 'text'; text: string } | { type: 'image'; mimeType: string; data: string }; + +export interface ToolCall { + /** Provider-assigned id, echoed back on the matching `role: 'tool'` message. */ + id: string; + /** Namespaced tool name (matches a `ToolDefinition.name`). */ + name: string; + arguments: Record; +} + +export type ChatMessage = + | { role: 'user'; content: ContentPart[] } + | { role: 'assistant'; content: ContentPart[]; toolCalls?: ToolCall[] } + | { role: 'tool'; toolCallId: string; toolName: string; content: ContentPart[]; isError?: boolean }; + +export interface GenerateRequest { + system?: string; + messages: ChatMessage[]; + tools?: ToolDefinition[]; + maxTokens?: number; + temperature?: number; +} + +export interface GenerateResult { + /** Assistant prose (may be empty when the model only calls tools). */ + text: string; + /** Tool calls the host must execute and feed back as `role: 'tool'` messages. */ + toolCalls: ToolCall[]; + stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'other'; + /** Provider-reported model id (also used to answer MCP sampling requests). */ + model: string; +} + +export interface LLMProvider { + readonly name: string; + generate(request: GenerateRequest): Promise; +} +``` + +Two things make this seam earn its keep: + +- **MCP tool definitions pass through it untouched.** `Tool.inputSchema` is already JSON Schema; every major provider accepts it as-is (`input_schema` for the Anthropic Messages API, `function.parameters` for Chat Completions, `parametersJsonSchema` for Gemini). The per-provider files in [`providers/`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client/providers) are each a complete, copyable mapping; the differences (where tool results go, how errors are flagged, what ids look like) are exactly the part worth reading once. +- **It serves both directions.** The chat loop calls it to drive the conversation, and the MCP sampling handler calls it to answer servers — one model integration, two consumers. + +Keep the seam in your application. It is deliberately _not_ an SDK package: the SDK stays a protocol library, and your host's message shapes belong to your host. + +## The loop (tools) + +Nothing in MCP runs the conversation for you. The loop every host writes: + +```ts source="../examples/cli-client/host/loop.ts#theLoop" +export async function runModelRounds(session: ChatSession): Promise { + const { host, provider, ui } = session; + // Server instructions and the aggregated tool list are stable within a single user turn. + const system = buildSystemPrompt(host); + const tools = host.toolDefinitions(); + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + const stopSpinner = ui.spinner(); + let result: GenerateResult; + try { + result = await provider.generate({ + system, + messages: session.messages, + tools, + maxTokens: session.maxTokens + }); + } finally { + stopSpinner(); + } + session.messages.push({ + role: 'assistant', + content: result.text ? [textPart(result.text)] : [], + ...(result.toolCalls.length > 0 ? { toolCalls: result.toolCalls } : {}) + }); + if (result.model !== session.announcedModel) { + session.announcedModel = result.model; + ui.status(`model: ${result.model}`); + } + if (result.text) ui.print(result.text); + if (result.toolCalls.length === 0) return; + + for (const call of result.toolCalls) { + ui.status(`→ ${call.name} ${JSON.stringify(call.arguments)}`); + // Long-running calls stay cancellable: Ctrl-C aborts this call (the SDK sends + // notifications/cancelled) and the failure goes back to the model like any other. + const cancellation = new AbortController(); + ui.setCancelHandler(() => { + ui.status(`cancelling ${call.name}…`, 'cancel'); + cancellation.abort(); + }); + let parts: ContentPart[]; + let isError: boolean; + try { + ({ parts, isError } = await host.executeToolCall(call, { signal: cancellation.signal })); + } finally { + ui.setCancelHandler(undefined); + } + const summary = partsToDisplayText(parts); + ui.status(`${isError ? '✗' : '✓'} ${call.name}: ${summary.length > 200 ? `${summary.slice(0, 200)}…` : summary}`); + session.messages.push({ role: 'tool', toolCallId: call.id, toolName: call.name, content: parts, isError }); + } + } + ui.print('(stopped: tool-call round limit reached)'); +} +``` + +The details that separate a working loop from a frustrating one: + +- **Namespace per server.** cli-client exposes every tool as `mcp____` (`host/naming.ts`), so two servers can both ship `search` and a model-issued call always routes back to the server that owns it. Sanitize server names first — provider APIs restrict tool-name characters. +- **Handle every tool call in the round, then loop.** Models issue parallel calls; execute them all and send all the results back before asking for the next turn (the Anthropic mapping additionally requires the results to share one user message — see `providers/anthropic.ts`). +- **`isError` is a result, not an exception.** Mark it as an error in the provider's format and let the model read the message — it is allowed to try something else. A _thrown_ error from `callTool` (unknown tool, timeout, lost connection) is different in kind, but the model should see that as a failed call too. +- **Narrow content blocks; never assume text.** Tool results can carry text, images, audio, resource links, and embedded resources (`host/content.ts` shows the full narrowing). Pass images through if your provider mapping supports them; reduce the rest to labelled placeholders. +- **Truncation is your job.** Neither the SDK nor the protocol caps what a server returns. cli-client caps everything it injects at 50k characters; pick a budget and state it. +- **Bound the loop.** A model that keeps calling tools forever is a bug; cap the rounds and say so when you hit the cap. +- **Fold `getInstructions()` into the system prompt** — server instructions exist precisely so the host can pass them to the model. + +> **What real hosts do** — Claude Code uses the same `mcp____` namespacing and feeds `isError` results back to the model as errors. Almost nobody in the wider ecosystem shows the loop itself: most SDK examples stop at "list tools and print them", and frameworks bury the loop inside middleware. It is one screen of code; write it once, visibly. + +## Resources become context + +Resources are **application-driven**: the protocol gives you list/read and deliberately does not say when to read. Three patterns cover real hosts: + +1. **User-driven (what cli-client implements).** The user names a resource — `@todos:todos://board what should I tackle first?` — the host calls `readResource`, and injects the contents as a context block _with provenance_: + + ```text + + …contents… + + The user attached this MCP resource as context. Use it to answer; do not re-read it unless told it changed. + ``` + + Label where the content came from, cap its size, replace binary contents with a placeholder, and tell the model not to re-fetch. Use `listResources()` (and resource templates plus `complete()`) to power the picker UX, and the client's `listChanged` option to keep the cached list fresh. To watch a specific resource, subscribe to it — `resources/subscribe` on 2025-era connections, `client.listen({ resourceSubscriptions: [uri] })` on 2026-07-28 — and react to `notifications/resources/updated`; cli-client exposes this as `/watch @server:uri`. + +2. **Auto-attach policies.** Some hosts attach certain resources to every conversation (an "active document", a project manifest) based on their own rules. Same mechanics as above — the policy is the only new part. + +3. **Model-driven (resources as tools).** If you want the _model_ to decide what to read, expose two synthetic tools — `list_resources(server?)` and `read_resource(server, uri)` — that call `listResources`/`readResource` under the hood. Register them only when at least one connected server actually declares the `resources` capability, apply the same size cap, and treat "not found" as a soft error that tells the model to re-list. This is the pattern to reach for when users won't know URIs but the task needs server data. + +> **What real hosts do** — Claude Code implements the user-driven path (`@server:uri`) _and_ the model-driven fallback (`ListMcpResources` / `ReadMcpResource` tools, registered only when a server declares resources, with a 100k-character cap), does not implement `resources/subscribe`, and relies on `list_changed` to invalidate its cached list. + +## Prompts become commands + +Prompts are user-invoked workflows. The host's job is small and concrete: + +- Surface each prompt as a command — cli-client uses `/server:prompt-name key=value …` — listing `prompt.arguments` so the user knows what to supply, and prompting for missing required arguments (`complete()` can power autocompletion for argument values). +- Call `getPrompt` and **append the returned messages to the conversation as separate turns, keeping their roles**. A prompt's value is often exactly that it seeds a multi-turn shape (context as a user turn, a primed assistant turn, then the ask); flattening it into one block of text throws that away. +- Then run the loop — the seeded conversation usually ends with something for the model to do. + +> **What real hosts do** — Claude Code exposes every server prompt as a slash command, but flattens the returned messages into a single hidden user message, discarding the roles. Keep the roles; it costs nothing and is what the shape is for. + +## Sampling: the server borrows your model + +`sampling/createMessage` is a server asking the _host's_ model to run a completion — so servers can ship LLM-powered features without shipping API keys. The host decides whether and how: + +```ts +client.setRequestHandler('sampling/createMessage', async request => { + const params = request.params; + const approved = await ui.confirm(`Server "${name}" wants to run an LLM request (${params.maxTokens} max tokens): "${preview(params)}". Allow?`); + if (!approved) { + throw new ProtocolError(ProtocolErrorCode.InvalidRequest, 'User declined the sampling request'); + } + const result = await provider.generate({ + system: params.systemPrompt, + messages: params.messages.map(toChatMessage), + maxTokens: Math.min(params.maxTokens, SAMPLING_MAX_TOKENS_CAP) + }); + return { role: 'assistant', content: { type: 'text', text: result.text }, model: result.model, stopReason: 'endTurn' }; +}); +``` + +The three host responsibilities, in order of importance: + +1. **Gate it on the user.** A sampling request spends the user's tokens and can carry data to a third-party API. Show what the server asked and require an explicit yes; treat "no answer" as no. Cap `maxTokens` regardless of what was requested. +2. **Route it through the same provider as the chat.** That is the entire point — one model integration serves both the conversation and the servers (todos-server's `prioritize` and `brainstorm_tasks` tools both work this way through cli-client). +3. **Decline by omission, not by error.** If your host will not support sampling, simply do not declare the `sampling` capability — servers can check for it and fall back. Do not declare it and then reject every request. + +Declare the capability in the client constructor and register the handler once; the SDK carries the request over both protocol revisions (as a server→client request on 2025-era connections, and via `input_required` results on 2026-07-28 connections) without any era-specific code in your handler. Note that 2025-era push-style sampling needs a sessionful server when running over Streamable HTTP, and that as of the 2026-07-28 revision sampling is in a deprecation window (see the spec's versioning notes) — supported, but check the spec status before making it load-bearing. + +> **What real hosts do** — Claude Code does not declare the sampling capability at all (it has its own model loop and declines by omission). The C# SDK and FastMCP both ship "sampling handler backed by your chat client" helpers — evidence that when a host does say yes, wiring it to the existing provider is the established shape. + +## Elicitation: the server asks your user + +Elicitation is the inverse of sampling: the server needs _the human_, not the model. Two modes arrive at the same handler: + +- **Form mode** carries `message` plus a flat `requestedSchema` (strings, numbers, booleans, enums — no nesting). Generate UI from it: cli-client walks the properties and asks one question per field in the terminal (`host/ui.ts`), validating against the declared type before accepting. +- **URL mode** carries a URL the user must visit (payment, OAuth-style consent, anything that should not pass through the host). Show it, let the user open it, confirm when done. + +Return exactly one of the three outcomes and mean it: `accept` (with the collected content), `decline` (the user said no), `cancel` (the user dismissed it). Decline and cancel are answers, not retries — a server that re-asks on decline is a bug, and a host that maps errors to `accept` is a worse one. cli-client fails closed: any error in form collection becomes `cancel`. + +## Roots + +Roots tell servers which directories the conversation is about. Derive them from something real — the workspace folders, a `--root` flag, the cwd — declare the `roots` capability, answer `roots/list`, and send `roots/list_changed` when the set changes (on 2025-era connections; 2026-07-28 servers re-request roots when they need them). cli-client keeps this to a dozen lines in `host/host.ts` plus a `/root add` command; it is the cheapest feature in the protocol to support properly. Like sampling and logging, roots is in the 2026-07-28 deprecation window (SEP-2577) — supported throughout the window, with paths passed as tool parameters or configuration as the long-term direction. + +## Logging and progress + +- **Progress** is the live channel: pass `onprogress` on long-running `callTool` calls and render it (a status line is enough). It also gives you per-call attribution when the model runs tools in parallel. (todos-server demonstrates it with `work_through_tasks` — say "do all my tasks" and watch the status line.) +- **Cancellation** is the other half of long-running calls: pass an `AbortSignal` in the call's `RequestOptions` and abort it to cancel — the SDK sends `notifications/cancelled`, the local call rejects, and a well-behaved server checks `ctx.mcpReq.signal` and stops working. cli-client wires Ctrl-C during a tool call to exactly this; try it mid-way through "do all my tasks". +- **Logging**: render `notifications/message` as it arrives, tagged with the server name. On 2025-era connections call `setLoggingLevel(...)` once per server to opt in; on 2026-07-28 connections log delivery is opted into per request via the `io.modelcontextprotocol/logLevel` `_meta` key (and MCP-level logging is in a deprecation window). Whatever the era: a stdio server's `stderr` is also worth surfacing — that is where well-behaved servers put their own diagnostics. + +## Connecting, configuration, and auth + +Hosts conventionally read an `mcpServers`-shaped config (cli-client's `host/config.ts`): + +- `{ command, args, env?, cwd? }` entries are spawned as child processes speaking stdio. Pass the child a minimal environment plus exactly what the entry lists — never your host's full environment; your API keys live there. +- `{ url, headers? }` entries connect over Streamable HTTP. Support `${VAR}` interpolation so tokens stay in the environment, not the file. +- Adding a server to the config is an act of trust: it sees whatever the model sends it and its results go straight into the model's context. Say that in your own docs. + +For protected HTTP servers, two tiers cover almost everything: + +1. **Static credentials** — a bearer token or API key in `headers`. One line of config, no flow. +2. **OAuth** — when a server answers 401, the SDK drives discovery, dynamic client registration, PKCE, and token exchange through an `OAuthClientProvider` you supply ([client guide → Authentication](./client.md#authentication)). The host's share of the work (`host/auth.ts`): ask the user before opening a browser, run a loopback callback server, **verify the `state` parameter yourself** (the SDK does not), call `finishAuth()` on the transport that got the 401, then reconnect on a fresh transport with the same provider. Keep tokens in memory or in the platform keychain. + +## Going further + +Patterns worth knowing about once the basics work — none of them are in cli-client's code, deliberately: + +- **Progressive discovery.** Hosts with many servers should not dump every tool into every request: filter by the conversation (per-server enable/disable, model-visible tool search, or a cheap relevance pass), lean on `server.getInstructions()` to tell the model what a server is for, and use the client's `listChanged` tracking to refresh lazily instead of re-listing on every turn. +- **Programmatic tool calling.** Nothing requires a model in the loop: `callTool` is just an API call, so hosts can run MCP tools from code — scheduled jobs, slash commands that hit a tool directly, or letting the model write code that calls tools through an execution environment instead of one round trip per call. The same namespacing and result-handling rules apply; only the caller changes. +- **Automatic resource loading.** The model-driven fallback from the resources section — list/read exposed as tools — is the simplest way to let the model pull in server data it was not handed up front. + +## Security responsibilities of a host + +A host sits between untrusted servers, a user's credentials, and a model that does what its context tells it. The short list cli-client implements and the guide above assumes: + +- Treat every server-provided string as untrusted input: strip terminal escape sequences before rendering, label injected content with its origin, and cap its size. +- Gate sampling on explicit user approval and cap its token spend; gate browser-opening (OAuth, URL elicitation) the same way. +- Never hand a child server process your full environment, and keep API keys out of config files (`${VAR}` interpolation exists for this). +- Validate the OAuth `state` parameter, only hand `https:` (or loopback) authorization URLs to the browser, and never render attacker-controllable error descriptions from callbacks. +- Treat tool annotations (`readOnlyHint`, `destructiveHint`) as hints for UX, never as a security boundary. + +## See also + +- [`examples/cli-client`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client) — the example this guide walks through; its README lists a scripted tour. +- [Client guide](./client.md) — the per-API reference for everything used here (connecting, auth, tools, resources, prompts, handlers, errors). +- [Client quickstart](./client-quickstart.md) — the smallest possible LLM-connected client (tools only, one server); cli-client is what it grows into. +- [Spec: client concepts](https://modelcontextprotocol.io/docs/learn/client-concepts) — the protocol-level view of the features wired here. diff --git a/examples/README.md b/examples/README.md index 9e1441fff2..89ee80f756 100644 --- a/examples/README.md +++ b/examples/README.md @@ -16,6 +16,8 @@ pnpm --filter @mcp-examples/ client -- --http http://127.0.0.1:3000/mcp Add `-- --legacy` to the client command for the 2025-era handshake. +The one exception to the generic commands is the reference pair: [`cli-client/`](./cli-client/README.md) and [`todos-server/`](./todos-server/README.md) have their own entry points (`pnpm --filter @mcp-examples/cli-client start`, `pnpm --filter @mcp-examples/todos-server start:http`) — see their READMEs. + ## Start here | Story | What it teaches | @@ -27,25 +29,27 @@ Add `-- --legacy` to the client command for the 2025-era handshake. ## Feature stories -| Story | What it teaches | Transports | Era | -| ------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------ | -------------- | -| [`mrtr/`](./mrtr/README.md) | Multi-round-trip write-once tool, secure `requestState` | stdio + http | modern | -| [`subscriptions/`](./subscriptions/README.md) | `subscriptions/listen`: `client.listen()` + auto-open, `handler.notify` / `ServerEventBus` | stdio + http | modern | -| [`streaming/`](./streaming/README.md) | In-flight progress, logging, cancellation | stdio + http | dual | -| [`elicitation/`](./elicitation/README.md) | Elicitation (form + URL mode), both eras: push-style on 2025, `inputRequired` on 2026 | stdio + http | dual | -| [`sampling/`](./sampling/README.md) | Tool that requests LLM sampling from the client, both eras: push-style on 2025, `inputRequired` on 2026 | stdio + http | dual | -| [`stickynotes/`](./stickynotes/README.md) | "Real app" capstone: tools mutate state, a resource per note, listChanged, elicitation-confirmed clear | stdio + http | dual | -| [`caching/`](./caching/README.md) | `cacheHints` stamping on cacheable results (2026-07-28) | stdio + http | modern | -| [`gateway/`](./gateway/README.md) | `connect({ prior })` — probe once, zero-round-trip connect for every worker (gateway pattern) | http | modern | -| [`custom-methods/`](./custom-methods/README.md) | Vendor-prefixed methods + custom notifications | stdio + http | dual | -| [`schema-validators/`](./schema-validators/README.md) | ArkType, Valibot, Zod, and `outputSchema` | stdio + http | dual | -| [`custom-version/`](./custom-version/README.md) | `supportedProtocolVersions` / version negotiation | stdio + http | legacy | -| [`parallel-calls/`](./parallel-calls/README.md) | Multiple clients / parallel tool calls, per-client notifications | stdio + http | dual | -| [`legacy-routing/`](./legacy-routing/README.md) | `isLegacyRequest` in front of an existing sessionful 1.x deployment + a strict modern entry on one port | http | dual (in-body) | -| [`bearer-auth/`](./bearer-auth/README.md) | Resource server with bearer token; `401` + `WWW-Authenticate` | http | dual | -| [`oauth/`](./oauth/README.md) | OAuth `authorization_code`: in-repo AS (auto-consent) + headless redirect-following client | http | dual | -| [`oauth-client-credentials/`](./oauth-client-credentials/README.md) | OAuth `client_credentials` (machine-to-machine): in-repo AS + `ClientCredentialsProvider` | http | dual | -| [`scoped-tools/`](./scoped-tools/README.md) | Per-tool scope on `createMcpHandler` — bearer-verify gate + handler-level `ctx.http?.authInfo` checks | http | modern | +| Story | What it teaches | Transports | Era | +| ------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------ | -------------- | +| [`mrtr/`](./mrtr/README.md) | Multi-round-trip write-once tool, secure `requestState` | stdio + http | modern | +| [`subscriptions/`](./subscriptions/README.md) | `subscriptions/listen`: `client.listen()` + auto-open, `handler.notify` / `ServerEventBus` | stdio + http | modern | +| [`streaming/`](./streaming/README.md) | In-flight progress, logging, cancellation | stdio + http | dual | +| [`elicitation/`](./elicitation/README.md) | Elicitation (form + URL mode), both eras: push-style on 2025, `inputRequired` on 2026 | stdio + http | dual | +| [`sampling/`](./sampling/README.md) | Tool that requests LLM sampling from the client, both eras: push-style on 2025, `inputRequired` on 2026 | stdio + http | dual | +| [`stickynotes/`](./stickynotes/README.md) | "Real app" capstone: tools mutate state, a resource per note, listChanged, elicitation-confirmed clear | stdio + http | dual | +| [`cli-client/`](./cli-client/README.md) | **Reference host**: LLM chat CLI with provider seam — tool loop, @-mention resources, prompt commands, sampling, elicitation, roots, OAuth, cancellation | stdio + http | dual | +| [`todos-server/`](./todos-server/README.md) | **Reference server** (pairs with cli-client): every server feature with a real job — CRUD tools, sampling, multi-round elicitation, subscriptions, progress | stdio + http | dual | +| [`caching/`](./caching/README.md) | `cacheHints` stamping on cacheable results (2026-07-28) | stdio + http | modern | +| [`gateway/`](./gateway/README.md) | `connect({ prior })` — probe once, zero-round-trip connect for every worker (gateway pattern) | http | modern | +| [`custom-methods/`](./custom-methods/README.md) | Vendor-prefixed methods + custom notifications | stdio + http | dual | +| [`schema-validators/`](./schema-validators/README.md) | ArkType, Valibot, Zod, and `outputSchema` | stdio + http | dual | +| [`custom-version/`](./custom-version/README.md) | `supportedProtocolVersions` / version negotiation | stdio + http | legacy | +| [`parallel-calls/`](./parallel-calls/README.md) | Multiple clients / parallel tool calls, per-client notifications | stdio + http | dual | +| [`legacy-routing/`](./legacy-routing/README.md) | `isLegacyRequest` in front of an existing sessionful 1.x deployment + a strict modern entry on one port | http | dual (in-body) | +| [`bearer-auth/`](./bearer-auth/README.md) | Resource server with bearer token; `401` + `WWW-Authenticate` | http | dual | +| [`oauth/`](./oauth/README.md) | OAuth `authorization_code`: in-repo AS (auto-consent) + headless redirect-following client | http | dual | +| [`oauth-client-credentials/`](./oauth-client-credentials/README.md) | OAuth `client_credentials` (machine-to-machine): in-repo AS + `ClientCredentialsProvider` | http | dual | +| [`scoped-tools/`](./scoped-tools/README.md) | Per-tool scope on `createMcpHandler` — bearer-verify gate + handler-level `ctx.http?.authInfo` checks | http | modern | ## HTTP hosting variants diff --git a/examples/cli-client/.gitignore b/examples/cli-client/.gitignore new file mode 100644 index 0000000000..d344ba6b06 --- /dev/null +++ b/examples/cli-client/.gitignore @@ -0,0 +1 @@ +config.json diff --git a/examples/cli-client/README.md b/examples/cli-client/README.md new file mode 100644 index 0000000000..2280b01c2a --- /dev/null +++ b/examples/cli-client/README.md @@ -0,0 +1,145 @@ +# cli-client — the reference MCP host + +An interactive, LLM-connected chat CLI with **no built-in tools**: everything the model can do comes from the MCP servers you connect it to. It is a minimal but complete host — every client-side MCP feature is wired the way a host application should wire it — and it is the example the [host-integration guide](../../docs/host-integration.md) walks through, file by file. + +Its standard workload is [`examples/todos-server`](../todos-server/README.md), the reference server, but it connects to **any** MCP server: a URL, a command line, or an `mcpServers`-style config file. + +## Quick start (no API key) + +From the repo root (first time: `pnpm install && pnpm build:all`): + +```bash +pnpm --filter @mcp-examples/cli-client start +``` + +That spawns todos-server over stdio and answers with the keyless `scripted` provider — enough to see the wiring move. For a real conversation, add a provider key (next section), then say `hi`: the model offers a guided tour that walks through every feature. + +## Providers + +The model sits behind one small interface (`providers/provider.ts`); each file in `providers/` is a complete, copyable mapping for one vendor. Pick one explicitly with `--provider`, or let the CLI auto-pick from the environment (checked in this order): + +| Provider | Enable with | Default model | Pin a model | +| ----------- | -------------------------------------------------------------- | ------------------------------------------- | ----------------------------------- | +| `anthropic` | `ANTHROPIC_API_KEY` (or an OAuth-style `ANTHROPIC_AUTH_TOKEN`) | newest Sonnet, resolved from the models API | `--model ` or `ANTHROPIC_MODEL` | +| `openai` | `OPENAI_API_KEY` | newest mainline GPT (non-pro) | `--model ` or `OPENAI_MODEL` | +| `gemini` | `GEMINI_API_KEY` | newest stable Flash | `--model ` or `GEMINI_MODEL` | +| `scripted` | nothing — the keyless default | n/a (replays canned turns) | n/a | + +```bash +ANTHROPIC_API_KEY=sk-… pnpm --filter @mcp-examples/cli-client start -- --provider anthropic +OPENAI_API_KEY=sk-… pnpm --filter @mcp-examples/cli-client start -- --provider openai +GEMINI_API_KEY=… pnpm --filter @mcp-examples/cli-client start -- --provider gemini + +# pin an exact model instead of the resolved latest +ANTHROPIC_API_KEY=sk-… pnpm --filter @mcp-examples/cli-client start -- --provider anthropic --model claude-sonnet-4-5 +``` + +Model ids are deliberately not hardcoded: unless pinned, each provider asks its own models API for the newest mid-tier model, so the example keeps working as vendors ship new ones. The `scripted` provider replays a fixed conversation — it is what CI uses (see [testing](#how-this-example-is-tested)), and what you get when no key is set. + +## Pair it with todos-server (two terminals) + +The full demo is the reference pair talking over HTTP: + +```bash +# Terminal A — serve the reference server over Streamable HTTP (port 3000) +pnpm --filter @mcp-examples/todos-server start:http + +# Terminal B — connect the host to it (add a provider key for a real model) +ANTHROPIC_API_KEY=sk-… pnpm --filter @mcp-examples/cli-client start -- --server http://127.0.0.1:3000/mcp --provider anthropic +``` + +The status line shows what was negotiated — `connected to "todos" (2026-07-28, 8 tools, 2 resources, 2 prompts)`. Add `--legacy` in terminal B to force the 2025-era handshake against the same server and watch the legacy arms of every feature run instead (`connected to "todos" (2025-11-25, …)`). + +A tour that touches everything, in one sitting: + +```text +brainstorm some tasks ← elicitation form (theme + how many) + approval-gated sampling +prioritize my open tasks ← sampling: you approve the request before it runs +/todos:plan-my-day focus=ops ← an MCP prompt as a slash command (tab-completes) +@todos:todos://board what's next? ← attach a resource as context +/watch @todos:todos://board ← subscribe: a note appears whenever the board changes +do all my tasks ← per-task progress + log notifications stream live +(Ctrl-C mid-run) ← cancellation: the tool stops early, the model is told +clear my completed tasks ← elicitation-confirmed bulk delete +/help /servers /tools /resources /prompts /roots +``` + +Tab completes slash commands, prompt names, `@server:uri` mentions, and prompt argument values (the latter through MCP `completion/complete`). + +## Every feature, and where to see it + +| MCP feature | Where you see it | What the host does | +| ------------------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Tools | just chat | aggregates every server's tools under `mcp____`, hands them to the model, executes the calls it makes, feeds results (including `isError`) back, repeats | +| Resources | `@todos:todos://board` in a message, `/resources` | `resources/read` → injected as a provenance-labelled context block; `list_changed` keeps the cached list fresh | +| Subscriptions | `/watch @todos:todos://board` | `resources/subscribe` on 2025-era connections, a `subscriptions/listen` resource filter on 2026-07-28; updates render as notes as the board changes | +| Prompts | `/todos:plan-my-day focus=ops`, `/prompts` | `prompts/get` seeds the conversation with the returned messages, keeping their roles | +| Completions | tab on prompt arguments | `completion/complete` against the server's `completable()` argument values | +| Sampling | the `prioritize` / `brainstorm_tasks` tools | the server borrows the host's model: the request is shown to you for approval, then routed through the same `LLMProvider` that drives the chat | +| Elicitation | `clear_done`, `brainstorm_tasks` | a terminal form generated from the requested schema; accept / decline / cancel are all honoured | +| Roots | `--root`, `/roots`, `/root add ` | workspace roots served via `roots/list`; on change, `roots/list_changed` on 2025-era connections (2026-07-28 removed the notification — servers re-request roots) | +| Logging & progress | the status lines, "do all my tasks" | `notifications/message` and per-call progress rendered as the work happens | +| Cancellation | Ctrl-C while a tool call is running | the host aborts the call's `RequestOptions.signal` (the SDK sends `notifications/cancelled`); todos-server checks `ctx.mcpReq.signal` and stops early | +| Auth | HTTP servers in your config | static headers from the config, or a full browser OAuth flow when a server answers 401 | + +## Connect it to your own servers + +For a one-off connection, skip the config file and pass the server directly: + +```sh +pnpm --filter @mcp-examples/cli-client start -- --server https://your-server.example.com/mcp +``` + +`--server` (repeatable) connects to exactly the targets you list: http(s) URLs over Streamable HTTP — the OAuth flow starts automatically if the server answers 401 — and anything else is spawned as a stdio command line. + +For a persistent setup, copy `config.example.json` to `config.json` (or pass `--config `) and list any MCP servers — the same shape most hosts read: + +```jsonc +{ + "mcpServers": { + "todos": { "command": "npx", "args": ["-y", "tsx", "/absolute/path/to/examples/todos-server/server.ts"] }, + "docs": { "url": "https://example.com/mcp" }, + "internal": { "url": "https://mcp.internal.example.com/mcp", "headers": { "Authorization": "Bearer ${INTERNAL_TOKEN}" } } + } +} +``` + +- `command`/`args` entries are spawned as child processes (stdio). They get a minimal environment plus whatever the entry's `env` lists — never the host's full environment. Relative paths resolve from wherever you run the CLI, so prefer absolute paths when in doubt. +- `url` entries connect over Streamable HTTP. `${VAR}` in `headers`/`env` values is read from the host's environment, so secrets stay out of the file. +- An HTTP server without configured headers that answers 401 triggers the OAuth flow: cli-client asks before opening your browser, runs authorization-code + PKCE against the server's authorization server, and verifies the callback `state`. Tokens live in memory for the session. (Try it against the [`oauth/`](../oauth/README.md) example server; `--callback-port ` pins the loopback callback port when you need to forward it over SSH.) + +## All flags + +```text +--server connect to just this server: an http(s) URL (OAuth on demand) or a stdio command line (repeatable) +--config mcpServers config file (default: ./config.json, falling back to spawning todos-server) +--provider scripted | anthropic | openai | gemini (default: first one with a key in the env, else scripted) +--model pin a model id (default: the provider's latest mid-tier model) +--root workspace root exposed to servers via roots/list (repeatable; default: cwd) +--callback-port fixed loopback port for the OAuth callback (default: a free port) +--legacy use the 2025 initialize handshake instead of probing for 2026-07-28 +-h, --help show usage +``` + +## How this example is tested + +`client.ts` is the CI entry: it replays a scripted conversation (`script/session.ts`) against todos-server with the `ScriptedProvider`, asserting at each step that the loop, namespacing, resource attachment, prompt-role handling, sampling approval, the multi-round elicitation + signed-`requestState` flow, completions, cancellation, progress, and logging actually round-tripped — over stdio and Streamable HTTP, on both protocol eras (the progress/logging/subscription assertions run on the stdio legs, where delivery timing is deterministic). `pnpm run:examples` runs it in CI; `pnpm --filter @mcp-examples/cli-client test` runs the unit tests for the provider mappings, routing, config parsing, form handling, and the OAuth helpers. + +On the legacy-era HTTP leg the sampling/elicitation steps are skipped: push-style server→client requests need a session, and todos-server runs `createMcpHandler`'s default stateless posture there (see [`sampling/`](../sampling/README.md) for the same caveat). + +## Layout + +```text +cli.ts interactive entry (readline chat) +client.ts CI entry (scripted conversation, self-verifying) +server.ts thin shim that runs ../todos-server/server.ts (so the example runner can spawn the pair) +host/ the host itself: connections, tool routing, resources, prompts, + sampling/elicitation/roots handlers, OAuth, config, terminal UI +providers/ the LLMProvider seam + one complete mapping per provider +script/ the scripted conversation CI replays +test/ unit tests +``` + +Unlike the single-feature stories, the SDK `Client`/transport construction here lives in `host/host.ts` rather than inline in the entry files — the host wiring is what this example documents, and the [host-integration guide](../../docs/host-integration.md) walks through it file by file. + +Not goals of this example: it is not an agent framework (no plugins, sub-agents, or planning), there is no streaming output, no conversation persistence, and the providers make exactly one `generate()` call per turn. diff --git a/examples/cli-client/cli.ts b/examples/cli-client/cli.ts new file mode 100644 index 0000000000..84b13df8ab --- /dev/null +++ b/examples/cli-client/cli.ts @@ -0,0 +1,154 @@ +#!/usr/bin/env node +/** + * The interactive entry point: a chat REPL with no built-in tools — everything comes from the + * MCP servers in your config. Run it from the repo root: + * + * pnpm --filter @mcp-examples/cli-client start # sibling todos-server, scripted provider + * ANTHROPIC_API_KEY=… pnpm --filter @mcp-examples/cli-client start -- --provider anthropic + * pnpm --filter @mcp-examples/cli-client start -- --config ./config.json --provider openai + * pnpm --filter @mcp-examples/cli-client start -- --server https://mcp.linear.app/mcp # one ad-hoc server, OAuth if needed + */ +import { existsSync } from 'node:fs'; +import { createInterface } from 'node:readline/promises'; +import { parseArgs } from 'node:util'; + +import type { CliClientConfig } from './host/config'; +import { configFromTargets, readConfigFile, todosServerConfig } from './host/config'; +import { McpHost } from './host/host'; +import { createSession, handleUserInput } from './host/loop'; +import { createCompleter, ReadlineUI } from './host/ui'; +import { AnthropicProvider } from './providers/anthropic'; +import { GeminiProvider } from './providers/gemini'; +import { OpenAIProvider } from './providers/openai'; +import type { LLMProvider } from './providers/provider'; +import { ScriptedProvider } from './providers/scripted'; + +const USAGE = `usage: tsx cli.ts [options] + --server connect to just this server: an http(s) URL (OAuth on demand) or a stdio command line (repeatable) + --config mcpServers config file (default: ./config.json, falling back to spawning the sibling todos-server) + --provider scripted | anthropic | openai | gemini (default: first one with an API key in the env, else scripted) + --model pin a model id (default: the provider's latest mid-tier model) + --root workspace root exposed to servers via roots/list (repeatable; default: cwd) + --callback-port fixed loopback port for the OAuth callback (default: a free port; set this when port-forwarding over SSH) + --legacy use the 2025 initialize handshake instead of probing for 2026-07-28 + --help this help`; + +function pickProvider(name: string | undefined, model: string | undefined): LLMProvider { + const chosen = + name ?? + (process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN + ? 'anthropic' + : process.env.OPENAI_API_KEY + ? 'openai' + : process.env.GEMINI_API_KEY + ? 'gemini' + : 'scripted'); + switch (chosen) { + case 'anthropic': { + return new AnthropicProvider(model); + } + case 'openai': { + return new OpenAIProvider(model); + } + case 'gemini': { + return new GeminiProvider(model); + } + case 'scripted': { + return new ScriptedProvider(); + } + default: { + throw new Error(`Unknown provider "${chosen}" (expected scripted | anthropic | openai | gemini)`); + } + } +} + +const { values } = parseArgs({ + // `pnpm … start -- --provider anthropic` forwards the literal `--`; drop it so only flags remain. + args: process.argv.slice(2).filter(argument => argument !== '--'), + options: { + server: { type: 'string', multiple: true }, + config: { type: 'string' }, + provider: { type: 'string' }, + model: { type: 'string' }, + root: { type: 'string', multiple: true }, + 'callback-port': { type: 'string' }, + legacy: { type: 'boolean' }, + help: { type: 'boolean', short: 'h' } + } +}); + +if (values.help) { + console.log(USAGE); + process.exit(0); +} + +// Tab completion needs the host's cached lists, but the host needs the UI — resolve lazily. +const hostRef: { current?: McpHost } = {}; +const ui = new ReadlineUI( + createInterface({ input: process.stdin, output: process.stdout, completer: createCompleter(() => hostRef.current) }) +); +const provider = pickProvider(values.provider, values.model); + +let config: CliClientConfig; +let configSource: string; +if (values.server && values.server.length > 0) { + configSource = '--server arguments'; + config = configFromTargets(values.server); +} else if (values.config) { + configSource = values.config; + config = await readConfigFile(values.config); +} else if (existsSync('./config.json')) { + configSource = './config.json'; + config = await readConfigFile('./config.json'); +} else { + configSource = 'sibling todos-server (no config.json found — see config.example.json)'; + config = todosServerConfig(); +} + +// Show exactly what we are about to connect to before doing it. +ui.status(`config: ${configSource}`); +for (const [serverName, entry] of Object.entries(config.mcpServers)) { + ui.status(` ${serverName} → ${'url' in entry ? entry.url : [entry.command, ...(entry.args ?? [])].join(' ')}`); +} + +const host = new McpHost({ + ui, + provider, + roots: values.root ?? [process.cwd()], + legacy: values.legacy ?? false, + oauthCallbackPort: values['callback-port'] ? Number.parseInt(values['callback-port'], 10) : undefined +}); +hostRef.current = host; +try { + await host.connect(config); +} catch (error) { + ui.print(error instanceof Error ? error.message : String(error)); + ui.close(); + process.exit(1); +} + +if (provider.name === 'scripted') { + ui.status( + 'provider: scripted (no API key found — replies are canned; set ANTHROPIC_API_KEY / OPENAI_API_KEY / GEMINI_API_KEY or pass --provider)' + ); +} else { + ui.status(`provider: ${provider.name}`); +} +ui.print('cli-client ready — say hi for a tour, /help for commands, /quit to exit.'); + +const chat = createSession(host, provider, ui); +try { + for (;;) { + const input = await ui.readUserInput(); + try { + const result = await handleUserInput(chat, input); + if (result === 'exit') break; + } catch (error) { + // A provider hiccup or a server error should cost one turn, not the whole session. + ui.status(`error: ${error instanceof Error ? error.message : String(error)}`); + } + } +} finally { + await host.close(); + ui.close(); +} diff --git a/examples/cli-client/client.ts b/examples/cli-client/client.ts new file mode 100644 index 0000000000..30772d23a9 --- /dev/null +++ b/examples/cli-client/client.ts @@ -0,0 +1,48 @@ +/** + * The CI entry point: replays the scripted conversation in script/session.ts against the + * sibling todos-server with the keyless ScriptedProvider, asserting at every step. This is + * what `pnpm run:examples` executes over the stdio and Streamable HTTP legs; run it yourself + * with `pnpm --filter @mcp-examples/cli-client client`. The interactive entry for humans is + * cli.ts (`pnpm --filter @mcp-examples/cli-client start`). + */ +import { parseExampleArgs, siblingPath } from '@mcp-examples/shared'; + +import type { CliClientConfig } from './host/config'; +import { McpHost } from './host/host'; +import { createSession, handleUserInput } from './host/loop'; +import { ScriptedProvider } from './providers/scripted'; +import { ScriptedUI } from './script/scriptedUi'; +import { buildScriptedSession } from './script/session'; + +const { transport, url, era } = parseExampleArgs(); + +// Push-style server→client requests (2025-era sampling/elicitation) have no return path on a +// stateless legacy HTTP deployment, so that leg skips the prioritize/clear_done steps and +// still exercises tools, resources-as-context, and prompts. +const interactive = !(era === 'legacy' && transport === 'http'); + +const session = buildScriptedSession({ interactive }); +const ui = new ScriptedUI({ confirmAnswers: session.confirmAnswers, askAnswers: session.askAnswers }); +const provider = new ScriptedProvider(session.turns); +const host = new McpHost({ ui, provider, roots: [process.cwd()], legacy: era === 'legacy' }); + +const config: CliClientConfig = + transport === 'stdio' + ? { mcpServers: { todos: { command: 'npx', args: ['-y', 'tsx', siblingPath(import.meta.url, '../todos-server/server.ts')] } } } + : { mcpServers: { todos: { url } } }; + +await host.connect(config); + +const chat = createSession(host, provider, ui); +for (const [index, input] of session.inputs.entries()) { + session.beforeInput?.[index]?.(ui); + await handleUserInput(chat, input); +} + +// Give debounced list-change refreshes (the SDK coalesces them for ~300 ms) a moment to land. +await new Promise(resolve => setTimeout(resolve, 750)); + +await session.verify({ ui, provider, host, era, transport }); +await host.close(); + +console.log('cli-client e2e: all checks passed'); diff --git a/examples/cli-client/config.example.json b/examples/cli-client/config.example.json new file mode 100644 index 0000000000..a8970a9126 --- /dev/null +++ b/examples/cli-client/config.example.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "todos": { + "command": "npx", + "args": ["-y", "tsx", "../todos-server/server.ts"] + } + } +} diff --git a/examples/cli-client/host/auth.ts b/examples/cli-client/host/auth.ts new file mode 100644 index 0000000000..d8e081ecf1 --- /dev/null +++ b/examples/cli-client/host/auth.ts @@ -0,0 +1,200 @@ +import { createServer } from 'node:http'; +import type { AddressInfo } from 'node:net'; + +import type { + OAuthClientInformationMixed, + OAuthClientMetadata, + OAuthClientProvider, + OAuthDiscoveryState, + OAuthTokens +} from '@modelcontextprotocol/client'; +import open from 'open'; + +import type { HostUI } from './ui'; + +/** + * In-memory OAuth provider for an interactive CLI host. Tokens live for the lifetime of the + * process; a real host would persist them in the platform keychain. The SDK drives the whole + * authorization-code + PKCE flow — the host only supplies storage, the redirect hook, and the + * `state` value it must verify when the callback comes back. + */ +export class CliOAuthClientProvider implements OAuthClientProvider { + private clientInfo?: OAuthClientInformationMixed; + private oauthTokens?: OAuthTokens; + private verifier?: string; + private discovery?: OAuthDiscoveryState; + private currentState?: string; + /** The authorization URL the SDK asked us to open (deferred until the user approves). */ + pendingAuthorizationUrl?: URL; + + constructor( + readonly redirectUrl: string, + readonly clientMetadata: OAuthClientMetadata + ) {} + + state(): string { + this.currentState ??= crypto.randomUUID(); + return this.currentState; + } + + /** The SDK never checks `state` itself — the host must compare this against the callback. */ + get expectedState(): string | undefined { + return this.currentState; + } + + clientInformation(): OAuthClientInformationMixed | undefined { + return this.clientInfo; + } + + saveClientInformation(clientInformation: OAuthClientInformationMixed): void { + this.clientInfo = clientInformation; + } + + tokens(): OAuthTokens | undefined { + return this.oauthTokens; + } + + saveTokens(tokens: OAuthTokens): void { + this.oauthTokens = tokens; + } + + redirectToAuthorization(authorizationUrl: URL): void { + // connect() is already in flight here, so just remember the URL; the host opens the + // browser only after the user has agreed to authorize this server. + this.pendingAuthorizationUrl = authorizationUrl; + } + + saveCodeVerifier(codeVerifier: string): void { + this.verifier = codeVerifier; + } + + codeVerifier(): string { + if (!this.verifier) throw new Error('No code verifier saved'); + return this.verifier; + } + + saveDiscoveryState(state: OAuthDiscoveryState): void { + this.discovery = state; + } + + discoveryState(): OAuthDiscoveryState | undefined { + return this.discovery; + } + + invalidateCredentials(scope: 'all' | 'client' | 'tokens' | 'verifier' | 'discovery'): void { + if (scope === 'all' || scope === 'client') this.clientInfo = undefined; + if (scope === 'all' || scope === 'tokens') this.oauthTokens = undefined; + if (scope === 'all' || scope === 'verifier') this.verifier = undefined; + if (scope === 'all' || scope === 'discovery') this.discovery = undefined; + } +} + +export function createOAuthProvider(serverName: string, callbackPort: number): CliOAuthClientProvider { + const callbackUrl = `http://127.0.0.1:${callbackPort}/callback`; + return new CliOAuthClientProvider(callbackUrl, { + client_name: `cli-client (${serverName})`, + redirect_uris: [callbackUrl], + grant_types: ['authorization_code', 'refresh_token'], + response_types: ['code'], + application_type: 'native', + token_endpoint_auth_method: 'none' + }); +} + +/** Start a loopback HTTP server on 127.0.0.1 and resolve with the OAuth callback's query parameters. */ +export function waitForOAuthCallback(port: number): Promise { + return new Promise((resolve, reject) => { + const server = createServer((req, res) => { + const requestUrl = new URL(req.url ?? '/', 'http://localhost'); + if (requestUrl.pathname !== '/callback') { + res.writeHead(404).end(); + return; + } + res.writeHead(200, { 'Content-Type': 'text/html' }); + res.end('

Authorization received

You can close this window and return to cli-client.

'); + resolve(requestUrl.searchParams); + setTimeout(() => server.close(), 1000); + }); + server.on('error', reject); + server.listen(port, '127.0.0.1'); + }); +} + +/** A free loopback port for the OAuth callback, picked by the OS. */ +export async function findCallbackPort(): Promise { + return new Promise((resolve, reject) => { + const probe = createServer(); + probe.on('error', reject); + probe.listen(0, '127.0.0.1', () => { + const { port } = probe.address() as AddressInfo; + probe.close(() => resolve(port)); + }); + }); +} + +function isLoopbackHost(hostname: string): boolean { + return hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '[::1]' || hostname === '::1'; +} + +/** + * Complete an interactive OAuth flow after `connect()` failed with `UnauthorizedError`: + * confirm with the user, open the system browser, wait for the loopback callback, verify + * `state`, and let the transport exchange the code (`finishAuth`). The caller then reconnects + * on a fresh transport with the same provider. + */ +export async function completeAuthorizationWithBrowser(options: { + serverName: string; + ui: HostUI; + provider: CliOAuthClientProvider; + callbackPort: number; + finishAuth: (callbackParams: URLSearchParams) => Promise; + /** Overridable so tests (or hosts with their own browser handling) don't shell out. */ + openUrl?: (url: string) => Promise; +}): Promise { + const { serverName, ui, provider, callbackPort, finishAuth } = options; + const openUrl = options.openUrl ?? (async (url: string) => void (await open(url))); + const authorizationUrl = provider.pendingAuthorizationUrl; + if (!authorizationUrl) return false; + // The authorization endpoint comes from server-controlled discovery metadata — never hand + // a non-https (or non-loopback) URL to the browser, and show the user where they're going. + if (authorizationUrl.protocol !== 'https:' && !(authorizationUrl.protocol === 'http:' && isLoopbackHost(authorizationUrl.hostname))) { + ui.status(`skipping "${serverName}" — refusing to open non-https authorization URL (${authorizationUrl.origin})`); + return false; + } + ui.attention(`[authorization]\nServer "${serverName}" requires authorization at ${authorizationUrl.origin}.`); + const approved = await ui.confirm('Open your browser to sign in?'); + if (!approved) { + ui.status(`skipping "${serverName}" — authorization declined`); + return false; + } + const callback = waitForOAuthCallback(callbackPort); + // Attach a handler immediately so a listen failure can't become an unhandled rejection + // while the browser-open is still in flight. + callback.catch(() => {}); + ui.status(`opening ${authorizationUrl.toString()}`); + try { + await openUrl(authorizationUrl.toString()); + } catch { + ui.print(`Could not open a browser automatically. Please open:\n${authorizationUrl.toString()}`); + } + let params: URLSearchParams; + try { + params = await callback; + } catch (error) { + ui.status(`authorization for "${serverName}" failed: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + if (params.get('error')) { + // Do not echo error_description — it is attacker-controllable in mix-up attacks. + ui.status(`authorization for "${serverName}" failed`); + return false; + } + // Fail closed: no recorded state (or a mismatch) means the callback cannot be trusted. + const expectedState = provider.expectedState; + if (!expectedState || params.get('state') !== expectedState) { + ui.status(`authorization for "${serverName}" rejected: state mismatch`); + return false; + } + await finishAuth(params); + return true; +} diff --git a/examples/cli-client/host/config.ts b/examples/cli-client/host/config.ts new file mode 100644 index 0000000000..05c0f15289 --- /dev/null +++ b/examples/cli-client/host/config.ts @@ -0,0 +1,115 @@ +import { readFile } from 'node:fs/promises'; + +import { siblingPath } from '@mcp-examples/shared'; +import * as z from 'zod/v4'; + +/** + * The standard `mcpServers` config shape (the same one Claude Desktop, Claude Code, and most + * other hosts read): stdio servers are spawned from `command`/`args`, remote servers are + * reached via `url`. Anything you list here is code/infrastructure you trust — adding a + * server means trusting it with whatever the model sends it. + */ +const stdioServerSchema = z.object({ + command: z.string(), + args: z.array(z.string()).optional(), + /** Extra environment for the spawned server. Children do NOT inherit the host's env. */ + env: z.record(z.string(), z.string()).optional(), + cwd: z.string().optional() +}); + +const httpServerSchema = z.object({ + url: z.string(), + /** Static headers (e.g. `Authorization: Bearer ${MY_TOKEN}`); `${VAR}` reads from the host env. */ + headers: z.record(z.string(), z.string()).optional() +}); + +const configSchema = z.object({ + mcpServers: z.record(z.string(), z.union([stdioServerSchema, httpServerSchema])) +}); + +export type StdioServerConfig = z.infer; +export type HttpServerConfig = z.infer; +export type ServerConfig = StdioServerConfig | HttpServerConfig; +export type CliClientConfig = z.infer; + +export function isHttpServer(config: ServerConfig): config is HttpServerConfig { + return 'url' in config; +} + +/** + * Replace `${VAR}` references with values from the environment, so secrets live in env vars + * rather than in the config file. Unknown variables resolve to ''. + */ +export function interpolateEnv(value: string, env: Record = process.env): string { + return value.replaceAll(/\$\{([A-Za-z_][A-Za-z0-9_]*)\}/g, (_match, name: string) => env[name] ?? ''); +} + +export function parseConfig(json: string, env: Record = process.env): CliClientConfig { + const parsed = configSchema.parse(JSON.parse(json)); + for (const entry of Object.values(parsed.mcpServers)) { + if (isHttpServer(entry)) { + entry.url = interpolateEnv(entry.url, env); + if (entry.headers) { + for (const [header, value] of Object.entries(entry.headers)) { + entry.headers[header] = interpolateEnv(value, env); + } + } + } else if (entry.env) { + for (const [name, value] of Object.entries(entry.env)) { + entry.env[name] = interpolateEnv(value, env); + } + } + } + return parsed; +} + +export async function readConfigFile(path: string): Promise { + return parseConfig(await readFile(path, 'utf8')); +} + +/** The zero-setup default: spawn the sibling todos-server over stdio. */ +export function todosServerConfig(): CliClientConfig { + return { + mcpServers: { + todos: { + command: 'npx', + args: ['-y', 'tsx', siblingPath(import.meta.url, '../../todos-server/server.ts')] + } + } + }; +} + +/** Derive a friendly server name from an ad-hoc `--server` URL (mcp.linear.app → "linear"). */ +function serverNameFromUrl(url: URL): string { + const generic = new Set(['mcp', 'www', 'api', 'app', 'dev', 'com', 'io', 'net', 'org', 'ai', 'run', 'co']); + const meaningful = url.hostname.split('.').find(label => !generic.has(label)); + return meaningful ?? url.hostname; +} + +/** Derive a friendly server name from an ad-hoc `--server` command line ("npx -y tsx server.ts" → "server"). */ +function serverNameFromCommand(tokens: string[]): string { + const last = tokens.at(-1) ?? 'server'; + const base = last.split(/[/\\]/).pop() ?? last; + return base.replace(/\.[A-Za-z]+$/, '') || 'server'; +} + +/** + * Build a config from ad-hoc `--server` arguments: http(s) URLs become Streamable HTTP entries + * (the OAuth flow starts on demand if the server answers 401), anything else is treated as a + * stdio command line to spawn. + */ +export function configFromTargets(targets: string[]): CliClientConfig { + const mcpServers: Record = {}; + const claim = (name: string, index: number): string => (mcpServers[name] === undefined ? name : `${name}_${index + 1}`); + for (const [index, target] of targets.entries()) { + if (/^https?:\/\//i.test(target)) { + mcpServers[claim(serverNameFromUrl(new URL(target)), index)] = { url: target }; + } else { + const [command, ...args] = target.split(/\s+/).filter(token => token.length > 0); + if (!command) throw new Error('--server got an empty target'); + mcpServers[claim(serverNameFromCommand([command, ...args]), index)] = { command, args }; + } + } + if (Object.keys(mcpServers).length === 0) throw new Error('--server needs at least one URL or command line'); + return { mcpServers }; +} diff --git a/examples/cli-client/host/content.ts b/examples/cli-client/host/content.ts new file mode 100644 index 0000000000..e196af0f06 --- /dev/null +++ b/examples/cli-client/host/content.ts @@ -0,0 +1,98 @@ +import type { CallToolResult, ContentBlock, ReadResourceResult } from '@modelcontextprotocol/client'; + +import type { ContentPart } from '../providers/provider'; +import { partsToText } from '../providers/provider'; + +/** + * How much server-provided text the model gets to see is a host policy, not an SDK or + * protocol concern. cli-client applies one cap to everything it injects (tool results and + * attached resources alike). + */ +export const MAX_INJECTED_CHARS = 50_000; + +/** + * Strip terminal escape sequences and stray control characters from server-provided text + * before rendering it: CSI sequences (colors, cursor movement), OSC sequences (window titles, + * hyperlinks), other ESC-introduced sequences, and any remaining C0 controls except tab, + * newline, and carriage return. Servers are not trusted to write to the user's terminal. + */ +const TERMINAL_ESCAPES = + // eslint-disable-next-line no-control-regex + /(?:\u001B\[|\u009B)[0-?]*[ -/]*[@-~]|\u001B\][^\u0007\u001B]*(?:\u0007|\u001B\\)?|\u001B[@-Z\\^_]|[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g; + +export function stripAnsi(text: string): string { + return text.replaceAll(TERMINAL_ESCAPES, ''); +} + +export function truncate(text: string, limit: number = MAX_INJECTED_CHARS): string { + if (text.length <= limit) return text; + return `${text.slice(0, limit)}\n[truncated ${text.length - limit} characters — cli-client caps injected content at ${limit} characters]`; +} + +/** + * Convert one MCP content block into provider content parts. + * + * Text and images pass through; audio, resource links, and binary embedded resources are + * reduced to placeholders the model can reason about. This is the narrowing every host + * writes — note there are five block types, not just text. + */ +export function contentBlockToParts(block: ContentBlock): ContentPart[] { + switch (block.type) { + case 'text': { + return [{ type: 'text', text: truncate(block.text) }]; + } + case 'image': { + return [{ type: 'image', mimeType: block.mimeType, data: block.data }]; + } + case 'audio': { + return [{ type: 'text', text: `[audio content: ${block.mimeType}]` }]; + } + case 'resource_link': { + return [{ type: 'text', text: `[linked resource: ${block.uri}${block.description ? ` — ${block.description}` : ''}]` }]; + } + case 'resource': { + if ('text' in block.resource) { + return [{ type: 'text', text: truncate(`[embedded resource ${block.resource.uri}]\n${block.resource.text}`) }]; + } + return [{ type: 'text', text: `[binary resource ${block.resource.uri}: ${block.resource.mimeType ?? 'unknown type'}]` }]; + } + default: { + return [{ type: 'text', text: '[unsupported content block]' }]; + } + } +} + +/** Convert MCP tool-result content into provider content parts (empty results get a placeholder). */ +export function toolResultToParts(result: CallToolResult): ContentPart[] { + const parts = result.content.flatMap(block => contentBlockToParts(block)); + if (parts.length === 0) { + parts.push({ type: 'text', text: '(tool returned no content)' }); + } + return parts; +} + +/** + * Render a read resource as a context block for the conversation, with explicit provenance + * (which server, which URI) and an instruction not to re-fetch — so the model can cite where + * the content came from and does not burn a tool round re-reading it. + */ +export function resourceToContextText(serverName: string, uri: string, result: ReadResourceResult): string { + const rendered = result.contents + .map(item => + 'text' in item + ? item.text + : `[binary content ${item.mimeType ?? 'unknown type'}, ${Math.ceil((item.blob.length * 3) / 4)} bytes]` + ) + .join('\n'); + return [ + ``, + truncate(rendered), + '', + 'The user attached this MCP resource as context. Use it to answer; do not re-read it unless told it changed.' + ].join('\n'); +} + +/** One-line rendering of content parts for the terminal (status output, not the model). */ +export function partsToDisplayText(parts: ContentPart[]): string { + return stripAnsi(partsToText(parts).replaceAll('\n', ' ')).trim(); +} diff --git a/examples/cli-client/host/host.ts b/examples/cli-client/host/host.ts new file mode 100644 index 0000000000..d11ba5e0c5 --- /dev/null +++ b/examples/cli-client/host/host.ts @@ -0,0 +1,528 @@ +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; + +import type { + CallToolResult, + CreateMessageRequest, + ElicitResult, + GetPromptResult, + McpSubscription, + Prompt, + Resource, + ResourceTemplateType, + Tool +} from '@modelcontextprotocol/client'; +import { + Client, + LOG_LEVEL_META_KEY, + ProtocolError, + ProtocolErrorCode, + SdkError, + StreamableHTTPClientTransport, + UnauthorizedError +} from '@modelcontextprotocol/client'; +import { StdioClientTransport } from '@modelcontextprotocol/client/stdio'; + +import type { ChatMessage, ContentPart, GenerateResult, LLMProvider, ToolCall, ToolDefinition } from '../providers/provider'; +import { isRecord } from '../providers/provider'; +import { completeAuthorizationWithBrowser, createOAuthProvider, findCallbackPort } from './auth'; +import type { CliClientConfig, ServerConfig } from './config'; +import { isHttpServer } from './config'; +import { contentBlockToParts, resourceToContextText, toolResultToParts } from './content'; +import { namespaceTool, routeNamespacedTool, sanitizeServerName } from './naming'; +import type { HostUI } from './ui'; +import { collectFormInput } from './ui'; + +const CLIENT_INFO = { name: 'cli-client', version: '0.1.0' }; + +/** Cap what a server can spend through the sampling handler, regardless of what it asks for. */ +const SAMPLING_MAX_TOKENS_CAP = 2048; + +export interface ConnectedServer { + name: string; + /** Sanitized name used in tool namespacing and slash commands. */ + key: string; + client: Client; + era: 'modern' | 'legacy'; + /** The protocol revision actually negotiated for this connection (e.g. "2026-07-28"). */ + protocolVersion: string; + httpTransport?: StreamableHTTPClientTransport; + instructions?: string; + tools: Tool[]; + resources: Resource[]; + resourceTemplates: ResourceTemplateType[]; + prompts: Prompt[]; +} + +export interface McpHostOptions { + ui: HostUI; + /** The same provider that drives the chat loop also answers sampling requests. */ + provider: LLMProvider; + /** Workspace roots exposed to servers via `roots/list` (absolute or cwd-relative paths). */ + roots?: string[]; + /** Use the 2025 `initialize` handshake instead of probing for 2026-07-28. */ + legacy?: boolean; + /** Fixed loopback port for the OAuth callback (default: an OS-assigned free port). Useful over SSH port-forwarding. */ + oauthCallbackPort?: number; +} + +function unwrapUnauthorized(error: unknown): UnauthorizedError | undefined { + if (error instanceof UnauthorizedError) return error; + // Under versionNegotiation 'auto', a connect-time 401 surfaces as + // SdkError(EraNegotiationFailed) with the UnauthorizedError in error.data.cause. + if (error instanceof SdkError && isRecord(error.data) && error.data.cause instanceof UnauthorizedError) { + return error.data.cause; + } + return undefined; +} + +function samplingContentToParts(content: CreateMessageRequest['params']['messages'][number]['content']): ContentPart[] { + const blocks = Array.isArray(content) ? content : [content]; + const parts: ContentPart[] = []; + for (const block of blocks) { + if (block.type === 'text') parts.push({ type: 'text', text: block.text }); + else if (block.type === 'image') parts.push({ type: 'image', mimeType: block.mimeType, data: block.data }); + else parts.push({ type: 'text', text: `[${block.type} content]` }); + } + return parts; +} + +/** + * One MCP client per configured server, plus everything a host owes the servers it connects + * to: tool aggregation and routing, resources as context, prompts, and the handlers for + * server-initiated requests (sampling, elicitation, roots), logging, and progress. + */ +export class McpHost { + private readonly ui: HostUI; + private readonly provider: LLMProvider; + private readonly legacy: boolean; + private roots: string[]; + private readonly watches: McpSubscription[] = []; + private readonly oauthCallbackPort?: number; + readonly servers = new Map(); + + constructor(options: McpHostOptions) { + this.ui = options.ui; + this.provider = options.provider; + this.legacy = options.legacy ?? false; + this.oauthCallbackPort = options.oauthCallbackPort; + this.roots = (options.roots ?? [process.cwd()]).map(root => path.resolve(root)); + } + + async connect(config: CliClientConfig): Promise { + for (const [name, entry] of Object.entries(config.mcpServers)) { + try { + const server = await this.connectServer(name, entry); + if (!server) continue; + // Sanitized keys can collide ("my server" vs "my_server") — keep them unique so + // namespaced tool calls always route to exactly one server. + const usedKeys = new Set([...this.servers.values()].map(existing => existing.key)); + for (let suffix = 2; usedKeys.has(server.key); suffix++) { + server.key = `${sanitizeServerName(name)}_${suffix}`; + } + this.servers.set(name, server); + this.ui.status( + `connected to "${name}" (${server.protocolVersion}, ${server.tools.length} tools, ${server.resources.length + server.resourceTemplates.length} resources, ${server.prompts.length} prompts)` + ); + } catch (error) { + this.ui.status(`failed to connect to "${name}": ${error instanceof Error ? error.message : String(error)}`); + } + } + if (this.servers.size === 0) { + throw new Error('No MCP servers connected — check the config file'); + } + } + + /** Aggregated, namespaced tool definitions for the model. */ + toolDefinitions(): ToolDefinition[] { + const definitions: ToolDefinition[] = []; + for (const server of this.servers.values()) { + for (const tool of server.tools) { + definitions.push({ + name: namespaceTool(server.key, tool.name), + description: tool.description, + inputSchema: tool.inputSchema + }); + } + } + return definitions; + } + + /** Server instructions folded into the system prompt — that is what they exist for. */ + systemInstructions(): string { + const sections = [...this.servers.values()] + .filter(server => server.instructions) + .map(server => `Instructions from the "${server.name}" server:\n${server.instructions}`); + return sections.join('\n\n'); + } + + /** Execute a model-issued tool call against the server that owns it. */ + async executeToolCall(call: ToolCall, options?: { signal?: AbortSignal }): Promise<{ parts: ContentPart[]; isError: boolean }> { + const route = routeNamespacedTool( + call.name, + [...this.servers.values()].map(server => server.key) + ); + const server = route && [...this.servers.values()].find(candidate => candidate.key === route.serverKey); + if (!route || !server) { + return { parts: [{ type: 'text', text: `Unknown tool: ${call.name}` }], isError: true }; + } + try { + const result: CallToolResult = await server.client.callTool( + { + name: route.toolName, + arguments: call.arguments, + // On 2026-07-28 connections servers only emit log notifications for requests + // that opt in via this _meta key; on 2025 the setLoggingLevel call covers it. + ...(server.era === 'modern' ? { _meta: { [LOG_LEVEL_META_KEY]: 'info' } } : {}) + }, + { + // Aborting this signal cancels the call: the SDK sends notifications/cancelled + // and the server can stop work via its own request signal. + signal: options?.signal, + onprogress: progress => { + const total = progress.total === undefined ? '' : `/${progress.total}`; + this.ui.status(`${call.name}: ${progress.message ?? 'working'} (${progress.progress}${total})`); + }, + resetTimeoutOnProgress: true + } + ); + return { parts: toolResultToParts(result), isError: result.isError === true }; + } catch (error) { + if (options?.signal?.aborted) { + return { parts: [{ type: 'text', text: 'Tool call cancelled by the user.' }], isError: true }; + } + // A thrown ProtocolError/SdkError (unknown tool, timeout, lost connection) is not the + // same thing as a tool-level isError result, but the model should see both as failures. + const reason = error instanceof Error ? error.message : String(error); + return { parts: [{ type: 'text', text: `Tool call failed: ${reason}` }], isError: true }; + } + } + + /** Resolve a `server:uri` reference (the part after the `@`) to the owning server and the resource URI. */ + private resolveResourceReference(reference: string): { server: ConnectedServer; uri: string } { + const separator = reference.indexOf(':'); + if (separator === -1) throw new Error(`Resource references look like @server:uri — got "@${reference}"`); + const serverName = reference.slice(0, separator); + const uri = reference.slice(separator + 1); + // Accept the configured name or its sanitized key (the form used in tool names and /commands). + const server = this.servers.get(serverName) ?? [...this.servers.values()].find(candidate => candidate.key === serverName); + if (!server) throw new Error(`Unknown server "${serverName}" in @${reference}`); + return { server, uri }; + } + + /** Resolve an `@server:uri` mention into a provenance-labelled context block. */ + async attachResource(reference: string): Promise { + const { server, uri } = this.resolveResourceReference(reference); + const result = await server.client.readResource({ uri }); + return resourceToContextText(server.name, uri, result); + } + + /** + * Watch a resource for change notifications. On 2025-era connections this is the + * `resources/subscribe` request; on 2026-07-28 connections per-resource subscriptions ride + * a `subscriptions/listen` stream instead. Updates arrive through the same + * `notifications/resources/updated` handler either way. + */ + async watchResource(reference: string): Promise { + const { server, uri } = this.resolveResourceReference(reference); + if (server.era === 'legacy') { + await server.client.subscribeResource({ uri }); + return; + } + const subscription = await server.client.listen({ resourceSubscriptions: [uri] }); + // The server acknowledges which parts of the filter it will honour — don't pretend to + // watch a resource the server will never report on. + if (!subscription.honoredFilter.resourceSubscriptions?.includes(uri)) { + await subscription.close().catch(() => {}); + throw new Error(`server "${server.name}" does not support resource subscriptions`); + } + this.watches.push(subscription); + } + + listResources(): Array<{ server: string; resource: Resource }> { + return [...this.servers.values()].flatMap(server => server.resources.map(resource => ({ server: server.name, resource }))); + } + + listPrompts(): Array<{ server: string; prompt: Prompt }> { + return [...this.servers.values()].flatMap(server => server.prompts.map(prompt => ({ server: server.name, prompt }))); + } + + findPrompt(serverName: string, promptName: string): { server: ConnectedServer; prompt: Prompt } | undefined { + const server = this.servers.get(serverName) ?? [...this.servers.values()].find(candidate => candidate.key === serverName); + const prompt = server?.prompts.find(candidate => candidate.name === promptName); + return server && prompt ? { server, prompt } : undefined; + } + + /** Argument-value suggestions for a prompt via MCP `completion/complete` (powers tab completion). */ + async completePromptArgument(serverName: string, promptName: string, argumentName: string, value: string): Promise { + const server = this.servers.get(serverName); + if (!server?.client.getServerCapabilities()?.completions) return []; + try { + const result = await server.client.complete({ + ref: { type: 'ref/prompt', name: promptName }, + argument: { name: argumentName, value } + }); + return result.completion.values; + } catch { + return []; + } + } + + /** `prompts/get`, with the returned message roles preserved as separate conversation turns. */ + async getPromptMessages(serverName: string, promptName: string, args: Record): Promise { + const found = this.findPrompt(serverName, promptName); + if (!found) throw new Error(`Unknown prompt ${serverName}:${promptName}`); + const result: GetPromptResult = await found.server.client.getPrompt({ name: promptName, arguments: args }); + return result.messages.map(message => ({ + role: message.role, + content: contentBlockToParts(message.content) + })); + } + + listRoots(): string[] { + return [...this.roots]; + } + + /** Add a workspace root and tell connected (legacy-era) servers the list changed. */ + async addRoot(directory: string): Promise { + this.roots.push(path.resolve(directory)); + for (const server of this.servers.values()) { + // roots/list_changed is a 2025-era notification; on 2026-07-28 connections the + // method is gone and servers re-request roots when they need them. + if (server.era === 'legacy') { + await server.client.sendRootsListChanged().catch(() => {}); + } + } + } + + async close(): Promise { + for (const watch of this.watches) { + await watch.close().catch(() => {}); + } + for (const server of this.servers.values()) { + if (server.httpTransport) { + await server.httpTransport.terminateSession().catch(() => {}); + } + await server.client.close().catch(() => {}); + } + this.servers.clear(); + } + + private buildClient(name: string): Client { + const client = new Client(CLIENT_INFO, { + versionNegotiation: { mode: this.legacy ? 'legacy' : 'auto' }, + capabilities: { + // Both elicitation modes are declared because the handler below implements both. + elicitation: { form: {}, url: {} }, + sampling: {}, + roots: { listChanged: true } + }, + listChanged: { + tools: { + onChanged: (error, tools) => { + const server = this.servers.get(name); + if (error || !server || !tools) return; + server.tools = tools; + this.ui.status(`tool list changed on "${name}" (${tools.length} tools)`); + } + }, + resources: { + onChanged: (error, resources) => { + const server = this.servers.get(name); + if (error || !server || !resources) return; + server.resources = resources; + this.ui.status(`resource list changed on "${name}" (${resources.length} resources)`); + } + }, + prompts: { + onChanged: (error, prompts) => { + const server = this.servers.get(name); + if (error || !server || !prompts) return; + server.prompts = prompts; + this.ui.status(`prompt list changed on "${name}" (${prompts.length} prompts)`); + } + } + } + }); + client.onerror = error => this.ui.status(`[${name}] transport error: ${error.message}`); + this.registerSamplingHandler(client, name); + this.registerElicitationHandler(client, name); + this.registerRootsHandler(client); + return client; + } + + /** + * Sampling: the server borrows the host's model. The request is shown to the user and + * nothing is sent to the provider until they approve — a server must not be able to spend + * the user's API quota (or exfiltrate conversation context) silently. + */ + private registerSamplingHandler(client: Client, name: string): void { + client.setRequestHandler('sampling/createMessage', async request => { + const params = request.params; + // Show the user the full request they are approving — an abbreviated preview would + // mean approving something they haven't actually seen. + const requestText = [ + ...(params.systemPrompt ? [`system: ${params.systemPrompt}`] : []), + ...params.messages.map( + message => + `${message.role}: ${samplingContentToParts(message.content) + .map(part => (part.type === 'text' ? part.text : '[image]')) + .join(' ')}` + ) + ].join('\n'); + // Cap the spend regardless of what the server asked for, and approve what is actually sent. + const grantedMaxTokens = Math.min(params.maxTokens, SAMPLING_MAX_TOKENS_CAP); + const capNote = grantedMaxTokens === params.maxTokens ? '' : ` (server asked for ${params.maxTokens})`; + this.ui.attention( + `[sampling request]\nServer "${name}" wants to run an LLM request through your ${this.provider.name} provider (${grantedMaxTokens} max tokens${capNote}):\n\n${requestText}\n` + ); + const approved = await this.ui.confirm('Allow?'); + if (!approved) { + throw new ProtocolError(ProtocolErrorCode.InvalidRequest, 'User declined the sampling request'); + } + const stopSpinner = this.ui.spinner(); + let result: GenerateResult; + try { + result = await this.provider.generate({ + system: params.systemPrompt, + messages: params.messages.map(message => ({ role: message.role, content: samplingContentToParts(message.content) })), + maxTokens: grantedMaxTokens + }); + } finally { + stopSpinner(); + } + return { + role: 'assistant' as const, + content: { type: 'text' as const, text: result.text }, + model: result.model, + stopReason: result.stopReason === 'max_tokens' ? 'maxTokens' : 'endTurn' + }; + }); + } + + /** Elicitation: render the requested form (or URL) in the terminal; errors fail closed to cancel. */ + private registerElicitationHandler(client: Client, name: string): void { + client.setRequestHandler('elicitation/create', async (request): Promise => { + const params = request.params; + if (params.mode === 'url') { + this.ui.attention( + `[elicitation request]\nServer "${name}" needs you to complete a step in the browser:\n\n${params.url}\n` + ); + const opened = await this.ui.confirm('Open the URL and confirm once you are done. Continue?'); + return opened ? { action: 'accept' } : { action: 'decline' }; + } + this.ui.attention(`[elicitation request]\nServer "${name}" is asking for input:\n\n${params.message}\n`); + return collectFormInput(this.ui, params.requestedSchema); + }); + } + + private registerRootsHandler(client: Client): void { + client.setRequestHandler('roots/list', () => ({ + roots: this.roots.map(root => ({ uri: pathToFileURL(root).href, name: path.basename(root) })) + })); + } + + private async connectServer(name: string, entry: ServerConfig): Promise { + const client = this.buildClient(name); + let httpTransport: StreamableHTTPClientTransport | undefined; + + if (isHttpServer(entry)) { + if (entry.headers && Object.keys(entry.headers).length > 0) { + // Static headers (e.g. a bearer token from the environment). No OAuth fallback — + // if the token is wrong the connection error is the more honest signal. + httpTransport = new StreamableHTTPClientTransport(new URL(entry.url), { requestInit: { headers: entry.headers } }); + await client.connect(httpTransport); + } else { + const callbackPort = this.oauthCallbackPort ?? (await findCallbackPort()); + const oauthProvider = createOAuthProvider(name, callbackPort); + httpTransport = new StreamableHTTPClientTransport(new URL(entry.url), { authProvider: oauthProvider }); + try { + await client.connect(httpTransport); + } catch (error) { + if (!unwrapUnauthorized(error)) throw error; + const finishTransport = httpTransport; + const authorized = await completeAuthorizationWithBrowser({ + serverName: name, + ui: this.ui, + provider: oauthProvider, + callbackPort, + finishAuth: params => finishTransport.finishAuth(params) + }); + if (!authorized) return undefined; + // finishAuth() exchanged the code on the old transport; reconnect on a fresh one. + httpTransport = new StreamableHTTPClientTransport(new URL(entry.url), { authProvider: oauthProvider }); + await client.connect(httpTransport); + } + } + } else { + const transport = new StdioClientTransport({ + command: entry.command, + args: entry.args, + // The child gets the SDK's minimal default environment plus exactly what the + // config lists — never the host's full environment (API keys stay here). + env: entry.env, + cwd: entry.cwd, + stderr: 'pipe' + }); + transport.stderr?.on('data', (chunk: Buffer) => { + const line = String(chunk).trim(); + if (line) this.ui.serverLog(name, 'stderr', line); + }); + await client.connect(transport); + } + + try { + const era = client.getProtocolEra() === 'modern' ? 'modern' : 'legacy'; + const capabilities = client.getServerCapabilities(); + + client.setNotificationHandler('notifications/message', notification => { + const { level, data, logger } = notification.params; + this.ui.serverLog(name, `${logger ? `${logger} ` : ''}${level}`, typeof data === 'string' ? data : JSON.stringify(data)); + }); + client.setNotificationHandler('notifications/resources/updated', notification => { + this.ui.note(`resource updated: @${name}:${notification.params.uri}`); + }); + if (era === 'legacy' && capabilities?.logging) { + await client.setLoggingLevel('info').catch(() => {}); + } + + // Discovery is gated on the advertised capabilities and degrades per call: a server + // may advertise a capability and still not implement every list method + // (resources/templates/list is the usual gap). One failed listing costs the host an + // empty list and a status line, not the whole connection. + const listOrEmpty = async (label: string, advertised: unknown, list: () => Promise): Promise => { + if (!advertised) return []; + return list().catch((error: unknown) => { + this.ui.status(`listing ${label} on "${name}" failed: ${error instanceof Error ? error.message : String(error)}`); + return []; + }); + }; + const [tools, resources, resourceTemplates, prompts] = await Promise.all([ + listOrEmpty('tools', capabilities?.tools, () => client.listTools().then(result => result.tools)), + listOrEmpty('resources', capabilities?.resources, () => client.listResources().then(result => result.resources)), + listOrEmpty('resource templates', capabilities?.resources, () => + client.listResourceTemplates().then(result => result.resourceTemplates) + ), + listOrEmpty('prompts', capabilities?.prompts, () => client.listPrompts().then(result => result.prompts)) + ]); + + return { + name, + key: sanitizeServerName(name), + client, + era, + protocolVersion: client.getNegotiatedProtocolVersion() ?? 'unknown', + httpTransport, + instructions: client.getInstructions(), + tools, + resources, + resourceTemplates, + prompts + }; + } catch (error) { + // Don't leak a connected client when post-connect setup fails. + await client.close().catch(() => {}); + throw error; + } + } +} diff --git a/examples/cli-client/host/loop.ts b/examples/cli-client/host/loop.ts new file mode 100644 index 0000000000..7eb79f4abe --- /dev/null +++ b/examples/cli-client/host/loop.ts @@ -0,0 +1,270 @@ +import { getDisplayName } from '@modelcontextprotocol/client'; + +import type { ChatMessage, ContentPart, GenerateResult, LLMProvider } from '../providers/provider'; +import { textPart } from '../providers/provider'; +import { partsToDisplayText } from './content'; +import type { McpHost } from './host'; +import type { HostUI } from './ui'; + +/** A model that keeps calling tools forever is a bug, not a feature — bound the loop. */ +export const MAX_TOOL_ROUNDS = 8; + +const BASE_SYSTEM_PROMPT = + 'You are cli-client, a terminal assistant. You have no built-in tools; every tool available to you comes from a connected MCP server. ' + + 'Use them when they help, report tool failures honestly, and keep answers short — this is a terminal. ' + + 'When the user greets you or asks what you can do, offer a short tour of what the connected servers provide (their instructions may suggest one).'; + +export interface ChatSession { + host: McpHost; + provider: LLMProvider; + ui: HostUI; + messages: ChatMessage[]; + maxTokens: number; + /** Last model id reported by the provider; announced once so users can see what answered. */ + announcedModel?: string; +} + +export function createSession(host: McpHost, provider: LLMProvider, ui: HostUI, maxTokens = 1024): ChatSession { + return { host, provider, ui, messages: [], maxTokens }; +} + +export function buildSystemPrompt(host: McpHost): string { + const instructions = host.systemInstructions(); + return instructions ? `${BASE_SYSTEM_PROMPT}\n\n${instructions}` : BASE_SYSTEM_PROMPT; +} + +/** + * The loop at the heart of every MCP host: + * ask the model → execute every tool call it issued → feed the results back → repeat until + * the model answers in prose (or the round cap is hit). Tool results go back as `role: 'tool'` + * messages so each provider can encode them natively, and `isError` results still go to the + * model — it is allowed to read the error and try something else. + */ +//#region theLoop +export async function runModelRounds(session: ChatSession): Promise { + const { host, provider, ui } = session; + // Server instructions and the aggregated tool list are stable within a single user turn. + const system = buildSystemPrompt(host); + const tools = host.toolDefinitions(); + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + const stopSpinner = ui.spinner(); + let result: GenerateResult; + try { + result = await provider.generate({ + system, + messages: session.messages, + tools, + maxTokens: session.maxTokens + }); + } finally { + stopSpinner(); + } + session.messages.push({ + role: 'assistant', + content: result.text ? [textPart(result.text)] : [], + ...(result.toolCalls.length > 0 ? { toolCalls: result.toolCalls } : {}) + }); + if (result.model !== session.announcedModel) { + session.announcedModel = result.model; + ui.status(`model: ${result.model}`); + } + if (result.text) ui.print(result.text); + if (result.toolCalls.length === 0) return; + + for (const call of result.toolCalls) { + ui.status(`→ ${call.name} ${JSON.stringify(call.arguments)}`); + // Long-running calls stay cancellable: Ctrl-C aborts this call (the SDK sends + // notifications/cancelled) and the failure goes back to the model like any other. + const cancellation = new AbortController(); + ui.setCancelHandler(() => { + ui.status(`cancelling ${call.name}…`, 'cancel'); + cancellation.abort(); + }); + let parts: ContentPart[]; + let isError: boolean; + try { + ({ parts, isError } = await host.executeToolCall(call, { signal: cancellation.signal })); + } finally { + ui.setCancelHandler(undefined); + } + const summary = partsToDisplayText(parts); + ui.status(`${isError ? '✗' : '✓'} ${call.name}: ${summary.length > 200 ? `${summary.slice(0, 200)}…` : summary}`); + session.messages.push({ role: 'tool', toolCallId: call.id, toolName: call.name, content: parts, isError }); + } + } + ui.print('(stopped: tool-call round limit reached)'); +} +//#endregion theLoop + +/** Send one user turn (with optional attached-resource context blocks) through the loop. */ +export async function runConversationTurn(session: ChatSession, userText: string, attachments: string[] = []): Promise { + const content: ContentPart[] = [...attachments.map(attachment => textPart(attachment)), textPart(userText)]; + session.messages.push({ role: 'user', content }); + await runModelRounds(session); +} + +/** Pull `@server:uri` mentions out of a chat line (server names may contain dots, spaces excepted). */ +export function extractMentions(input: string): { text: string; mentions: string[] } { + const mentions = [...input.matchAll(/@([^\s:@]+:\S+)/g)].map(match => match[1]).filter(mention => mention !== undefined); + return { text: input.trim(), mentions }; +} + +/** Parse `key=value` arguments for a `/server:prompt` command. */ +export function parsePromptArgs(rest: string): Record { + const args: Record = {}; + for (const [, key, raw] of rest.matchAll(/([A-Za-z0-9_-]+)=("[^"]*"|\S+)/g)) { + if (key && raw !== undefined) { + args[key] = raw.replaceAll(/^"|"$/g, ''); + } + } + return args; +} + +const HELP = `cli-client commands: + /help show this help + /servers connected servers and what they offer + /tools every (namespaced) tool the model can call + /resources resources you can attach with @server:uri + /prompts prompts you can run as /server:prompt-name [key=value …] + /roots workspace roots exposed to servers + /root add add a workspace root (sends roots/list_changed) + /watch @server:uri get a note whenever that resource changes + /quit exit + @server:uri attach a resource to your next message as context + /server:prompt-name k=v … run an MCP prompt as a slash command + Ctrl-C cancel the tool call that is currently running (otherwise exit)`; + +export type InputResult = 'continue' | 'exit'; + +/** Print rows as an aligned two-column listing, one line per row, trimmed to the terminal width. */ +function printAligned(ui: HostUI, rows: ReadonlyArray, emptyMessage: string): void { + if (rows.length === 0) { + ui.print(emptyMessage); + return; + } + const nameWidth = Math.min(Math.max(...rows.map(row => row[0]?.length ?? 0), 0), 48); + const columns = process.stdout.columns ?? 120; + for (const [name = '', description = ''] of rows) { + const line = `${name.padEnd(nameWidth)} ${description}`; + ui.print(line.length > columns ? `${line.slice(0, columns - 1)}…` : line); + } +} + +/** + * Dispatch one line of user input: built-in slash commands, `/server:prompt` commands, + * or a plain chat message (with `@server:uri` attachments resolved first). + */ +export async function handleUserInput(session: ChatSession, input: string): Promise { + const { host, ui } = session; + const trimmed = input.trim(); + if (!trimmed) return 'continue'; + + if (trimmed === '/quit' || trimmed === '/exit') return 'exit'; + if (trimmed === '/help') { + ui.print(HELP); + return 'continue'; + } + if (trimmed === '/servers') { + printAligned( + ui, + [...host.servers.values()].map(server => [ + server.name, + `protocol ${server.protocolVersion}, ${server.tools.length} tools, ${server.resources.length} resources (+${server.resourceTemplates.length} templates), ${server.prompts.length} prompts` + ]), + '[no servers connected]' + ); + return 'continue'; + } + if (trimmed === '/tools') { + printAligned( + ui, + host.toolDefinitions().map(tool => [tool.name, tool.description ?? '']), + '[no tools found — the connected servers expose none]' + ); + return 'continue'; + } + if (trimmed === '/resources') { + printAligned( + ui, + host.listResources().map(({ server, resource }) => [`@${server}:${resource.uri}`, getDisplayName(resource)]), + '[no resources found — the connected servers expose none]' + ); + return 'continue'; + } + if (trimmed === '/prompts') { + printAligned( + ui, + host.listPrompts().map(({ server, prompt }) => { + const args = (prompt.arguments ?? []).map(argument => `${argument.name}${argument.required ? '' : '?'}`).join(' '); + return [`/${server}:${prompt.name}${args ? ` ${args}` : ''}`, prompt.description ?? '']; + }), + '[no prompts found — the connected servers expose none]' + ); + return 'continue'; + } + if (trimmed === '/roots') { + for (const root of host.listRoots()) ui.print(root); + return 'continue'; + } + if (trimmed.startsWith('/root add ')) { + await host.addRoot(trimmed.slice('/root add '.length).trim()); + ui.status('root added'); + return 'continue'; + } + if (trimmed === '/watch' || trimmed.startsWith('/watch ')) { + const reference = trimmed.slice('/watch'.length).trim().replace(/^@/, ''); + if (!reference) { + ui.print('usage: /watch @server:uri (see /resources)'); + return 'continue'; + } + try { + await host.watchResource(reference); + ui.note(`watching @${reference} — you'll get a note when it changes`); + } catch (error) { + ui.status(`could not watch @${reference}: ${error instanceof Error ? error.message : String(error)}`); + } + return 'continue'; + } + + // `/server:prompt-name key=value …` — MCP prompts become slash commands. + const promptCommand = trimmed.match(/^\/([A-Za-z0-9_-]+):(\S+)\s*(.*)$/); + if (promptCommand) { + const serverName = promptCommand[1] ?? ''; + const promptName = promptCommand[2] ?? ''; + const rest = promptCommand[3] ?? ''; + const found = host.findPrompt(serverName, promptName); + if (!found) { + ui.print(`Unknown prompt: /${serverName}:${promptName} (see /prompts)`); + return 'continue'; + } + const args = parsePromptArgs(rest); + for (const argument of found.prompt.arguments ?? []) { + if (argument.required && args[argument.name] === undefined) { + args[argument.name] = await ui.ask( + `[prompt argument] ${argument.name}${argument.description ? ` (${argument.description})` : ''}` + ); + } + } + // The prompt's messages seed the conversation as-is — user and assistant turns stay + // distinct turns rather than being flattened into one block of text. + const messages = await host.getPromptMessages(found.server.name, found.prompt.name, args); + session.messages.push(...messages); + await runModelRounds(session); + return 'continue'; + } + + const { text, mentions } = extractMentions(trimmed); + const attachments: string[] = []; + for (const mention of mentions) { + try { + attachments.push(await host.attachResource(mention)); + ui.note(`attached resource @${mention} as context`); + } catch (error) { + ui.status( + `could not attach @${mention}: ${error instanceof Error ? error.message : String(error)} — mentions look like @server:uri (see /resources)` + ); + } + } + await runConversationTurn(session, text, attachments); + return 'continue'; +} diff --git a/examples/cli-client/host/naming.ts b/examples/cli-client/host/naming.ts new file mode 100644 index 0000000000..faa78aacff --- /dev/null +++ b/examples/cli-client/host/naming.ts @@ -0,0 +1,22 @@ +/** + * Per-server tool names are namespaced `mcp____` before they reach the model + * (the same scheme Claude Code uses), so two servers can both expose `search` and the host + * can always route a model-issued call back to the server that owns it. + */ +export function sanitizeServerName(name: string): string { + return name.replaceAll(/[^a-zA-Z0-9_-]/g, '_'); +} + +export function namespaceTool(serverKey: string, toolName: string): string { + return `mcp__${serverKey}__${toolName}`; +} + +export function routeNamespacedTool(name: string, serverKeys: string[]): { serverKey: string; toolName: string } | undefined { + for (const serverKey of serverKeys.toSorted((a, b) => b.length - a.length)) { + const prefix = `mcp__${serverKey}__`; + if (name.startsWith(prefix) && name.length > prefix.length) { + return { serverKey, toolName: name.slice(prefix.length) }; + } + } + return undefined; +} diff --git a/examples/cli-client/host/ui.ts b/examples/cli-client/host/ui.ts new file mode 100644 index 0000000000..d0f0509484 --- /dev/null +++ b/examples/cli-client/host/ui.ts @@ -0,0 +1,377 @@ +import type { Interface } from 'node:readline/promises'; +import { createInterface } from 'node:readline/promises'; + +import type { ElicitRequestFormParams, ElicitResult } from '@modelcontextprotocol/client'; + +import { stripAnsi } from './content'; +import type { McpHost } from './host'; + +// Minimal styling so a conversation is scannable: user input stays the terminal default, +// assistant prose is slightly dimmed, operational lines (tool calls, progress, logs) are grey, +// and anything that needs the user's decision (elicitation, approvals) is highlighted. +// Only applied on a TTY, so piped output stays plain text. +const useColor = process.stdout.isTTY === true; + +function paint(style: string, text: string): string { + return useColor ? `[${style}m${text}` : text; +} + +/** + * Everything cli-client ever shows or asks a human goes through this interface, so the e2e + * driver (client.ts) can swap in a scripted implementation and the rest of the host code + * stays identical. The two `confirm` call sites that matter for safety are the sampling + * approval gate and the OAuth browser-open prompt. + */ +export interface HostUI { + /** Assistant output and primary information. */ + print(text: string): void; + /** Transient operational lines: tool calls, progress, connection events. A 'cancel' tone marks user cancellations. */ + status(text: string, tone?: 'info' | 'cancel'): void; + /** Something that needs the user's decision next (elicitation forms, approval requests). */ + attention(text: string): void; + /** Something that just became part of the conversation but isn't prose (an attached resource). */ + note(text: string): void; + /** A log notification received from a server. */ + serverLog(server: string, level: string, text: string): void; + /** Yes/no decision gate. Must default to "no" on uncertainty. */ + confirm(question: string): Promise; + /** Free-form question (elicitation form fields, prompt arguments). */ + ask(question: string): Promise; + /** Show a "working…" indicator until the returned stop function is called. */ + spinner(): () => void; + /** While set, an interrupt (Ctrl-C) calls the handler instead of exiting the CLI. */ + setCancelHandler(handler: (() => void) | undefined): void; +} + +/** + * Just enough Markdown for a terminal — headings, bold, italic, inline code, bullets, and + * horizontal rules — so model prose doesn't read as raw `**` markup. Deliberately not a parser: + * anything it doesn't recognise passes through untouched. + */ +export function renderMarkdownLite(text: string): string { + return text + .split('\n') + .map(line => { + if (/^\s*(?:---+|\*\*\*+)\s*$/.test(line)) return paint('2', '─'.repeat(40)); + if (line.startsWith('```')) return paint('2', line); + const heading = /^(#{1,6})\s+(.*)$/.exec(line); + if (heading) return paint('1;4', heading[2] ?? ''); + let rendered = line.replace(/^(\s*)[-*]\s+/, '$1• '); + rendered = rendered.replaceAll(/\*\*([^*]+)\*\*/g, (_match, inner: string) => paint('1', inner)); + rendered = rendered.replaceAll(/(? paint('3', inner)); + rendered = rendered.replaceAll(/`([^`]+)`/g, (_match, inner: string) => paint('36', inner)); + return rendered; + }) + .join('\n'); +} + +const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; +const SPINNER_WORDS = ['Pondering', 'Scheming', 'Brewing', 'Conjuring', 'Mulling', 'Percolating', 'Noodling', 'Ruminating']; + +export class ReadlineUI implements HostUI { + private readonly rl: Interface; + /** Whether the previous output line was operational, so prose gets a separating blank line. */ + private afterMeta = false; + /** Whether an attention block (sampling/elicitation/authorization) is open and awaiting its closing rule. */ + private inAttentionBlock = false; + private spinnerTimer?: NodeJS.Timeout; + private cancelHandler?: () => void; + + /** Ctrl-C cancels the in-flight tool call when one is running, and exits the CLI otherwise. */ + private readonly handleInterrupt = (): void => { + if (this.cancelHandler) { + this.cancelHandler(); + return; + } + this.rl.close(); + // eslint-disable-next-line unicorn/no-process-exit + process.exit(130); + }; + + constructor(rl?: Interface) { + this.rl = rl ?? createInterface({ input: process.stdin, output: process.stdout }); + this.rl.on('SIGINT', this.handleInterrupt); + process.on('SIGINT', this.handleInterrupt); + } + + /** Everything that isn't user input or assistant prose shares a two-space gutter and stays on one line. */ + private clipToWidth(text: string): string { + const columns = process.stdout.columns ?? 120; + return text.length > columns ? `${text.slice(0, Math.max(0, columns - 1))}…` : text; + } + + private horizontalRule(): string { + return paint('2;33', ` ${'─'.repeat(Math.max(10, Math.min(process.stdout.columns ?? 80, 100) - 2))}`); + } + + /** If a spinner is animating, wipe its line so real output never lands next to it. */ + private clearSpinnerLine(): void { + if (this.spinnerTimer) this.clearLine(); + } + + /** Attention blocks are framed by horizontal rules; the closing rule prints when other output resumes. */ + private closeAttentionBlock(): void { + if (!this.inAttentionBlock) return; + console.log(this.horizontalRule()); + this.inAttentionBlock = false; + this.afterMeta = true; + } + + /** The chat prompt itself also goes through the shared readline instance. */ + async readUserInput(): Promise { + this.closeAttentionBlock(); + const answer = await this.rl.question(paint('1', '\n> ')); + console.log(); + this.afterMeta = false; + return answer.trim(); + } + + print(text: string): void { + // The assistant's prose: rendered with just-enough Markdown at the left margin, separated + // from any operational lines above so the conversation is easy to scan. + this.clearSpinnerLine(); + this.closeAttentionBlock(); + if (this.afterMeta) console.log(); + console.log(renderMarkdownLite(stripAnsi(text))); + this.afterMeta = false; + } + + status(text: string, tone: 'info' | 'cancel' = 'info'): void { + // Operational lines (tool calls, progress, connection events): grey italic, one line each. + // User cancellations get a red tag so they stand out from routine chatter. + this.clearSpinnerLine(); + this.closeAttentionBlock(); + if (tone === 'cancel') { + console.log(` ${paint('1;31', '[user cancellation]')} ${paint('90', this.clipToWidth(stripAnsi(text)))}`); + } else { + console.log(paint('3;90', this.clipToWidth(` · ${stripAnsi(text)}`))); + } + this.afterMeta = true; + } + + attention(text: string): void { + // The user has to act on this next (elicitation form, approval): highlighted, framed by + // horizontal rules, with the first line as the block label and the rest indented under it. + this.clearSpinnerLine(); + if (!this.inAttentionBlock) { + console.log(this.horizontalRule()); + this.inAttentionBlock = true; + } + console.log(paint('1;33', ` ${stripAnsi(text).replaceAll('\n', '\n ')}`)); + this.afterMeta = true; + } + + note(text: string): void { + // Things that became part of the conversation but aren't prose (attached resources). + this.clearSpinnerLine(); + this.closeAttentionBlock(); + console.log(paint('36', this.clipToWidth(` ▍ ${stripAnsi(text)}`))); + this.afterMeta = true; + } + + serverLog(server: string, level: string, text: string): void { + // Lines that originate on the server (notifications/message, child stderr) carry a tag. + this.clearSpinnerLine(); + this.closeAttentionBlock(); + const tag = level === 'stderr' ? `[${server} stderr]` : `[${server} notification]`; + const body = level === 'stderr' ? stripAnsi(text) : `${level}: ${stripAnsi(text)}`; + console.log(` ${paint('35', tag)} ${paint('90', this.clipToWidth(body))}`); + this.afterMeta = true; + } + + async confirm(question: string): Promise { + this.clearSpinnerLine(); + const raw = await this.rl.question(paint('1;33', ` ${stripAnsi(question)} [y/N] `)); + const answer = raw.trim().toLowerCase(); + this.afterMeta = true; + return answer === 'y' || answer === 'yes'; + } + + async ask(question: string): Promise { + this.clearSpinnerLine(); + const answer = await this.rl.question(paint('1;33', ` ${stripAnsi(question)}: `)); + this.afterMeta = true; + return answer.trim(); + } + + /** Stops the spinner and wipes its line; safe to call when no spinner is running. */ + private readonly stopSpinner = (): void => { + if (this.spinnerTimer) clearInterval(this.spinnerTimer); + this.spinnerTimer = undefined; + this.clearLine(); + }; + + /** Wipe the current terminal line (used to erase an in-place spinner frame). */ + private clearLine(): void { + if (process.stdout.isTTY) process.stdout.write('\r'); + } + + spinner(): () => void { + // Animated "the model is thinking" line; redrawn in place and wiped before any real output. + if (!process.stdout.isTTY || this.spinnerTimer) return this.stopSpinner; + const startedAt = Date.now(); + let tick = 0; + const render = (): void => { + const word = SPINNER_WORDS[Math.floor((Date.now() - startedAt) / 4000) % SPINNER_WORDS.length] ?? 'Working'; + const seconds = Math.round((Date.now() - startedAt) / 1000); + const frame = SPINNER_FRAMES[tick % SPINNER_FRAMES.length] ?? '·'; + process.stdout.write(`\r${paint('3;90', ` ${frame} ${word}… (${seconds}s)`)}`); + tick++; + }; + render(); + this.spinnerTimer = setInterval(render, 120); + return this.stopSpinner; + } + + setCancelHandler(handler: (() => void) | undefined): void { + this.cancelHandler = handler; + } + + close(): void { + this.rl.close(); + } +} + +type FormSchema = ElicitRequestFormParams['requestedSchema']; +type FormField = FormSchema['properties'][string]; +type FormValue = string | number | boolean | string[]; + +/** Build the one-line prompt for a single elicitation form field. */ +export function describeField(name: string, field: FormField, required: boolean): string { + const pieces: string[] = [`${field.title ?? name}`]; + if (field.description) pieces.push(`(${field.description})`); + if ('enum' in field && field.enum) pieces.push(`[options: ${field.enum.join(', ')}]`); + if (field.type === 'boolean') pieces.push('[yes/no]'); + if ((field.type === 'number' || field.type === 'integer') && (field.minimum !== undefined || field.maximum !== undefined)) { + pieces.push(`[${field.minimum ?? ''}..${field.maximum ?? ''}]`); + } + if ('default' in field && field.default !== undefined) { + pieces.push(`[default: ${String(field.default)}]`, '(Enter for the default)'); + } else { + pieces.push(required ? '(required)' : '(optional — Enter to skip)'); + } + return pieces.join(' '); +} + +/** Parse one raw answer according to the field's primitive type; undefined means "invalid". */ +export function parseFieldAnswer(field: FormField, answer: string): FormValue | undefined { + if (field.type === 'boolean') { + const lowered = answer.toLowerCase(); + if (['y', 'yes', 'true'].includes(lowered)) return true; + if (['n', 'no', 'false'].includes(lowered)) return false; + return undefined; + } + if (field.type === 'number' || field.type === 'integer') { + const value = Number(answer); + if (Number.isNaN(value)) return undefined; + if (field.type === 'integer' && !Number.isInteger(value)) return undefined; + if (field.minimum !== undefined && value < field.minimum) return undefined; + if (field.maximum !== undefined && value > field.maximum) return undefined; + return value; + } + if (field.type === 'array') { + return answer + .split(',') + .map(item => item.trim()) + .filter(item => item.length > 0); + } + if ('enum' in field && field.enum && !field.enum.includes(answer)) return undefined; + return answer; +} + +/** + * Walk an elicitation form schema field by field, collecting answers through the UI. + * The user can answer `decline` or `cancel` at any field; errors fail closed to cancel. + * Note the three distinct outcomes — decline ("no") and cancel ("dismissed") are not the same. + */ +export async function collectFormInput(ui: HostUI, schema: FormSchema): Promise { + try { + const required = schema.required ?? []; + const content: Record = {}; + for (const [name, field] of Object.entries(schema.properties)) { + const isRequired = required.includes(name); + for (let attempt = 0; attempt < 3; attempt++) { + const answer = await ui.ask(describeField(name, field, isRequired)); + if (answer.toLowerCase() === 'decline') return { action: 'decline' }; + if (answer.toLowerCase() === 'cancel') return { action: 'cancel' }; + if (answer === '') { + if ('default' in field && field.default !== undefined) { + content[name] = field.default as FormValue; + break; + } + if (!isRequired) break; + ui.attention('this field is required (or answer "decline" / "cancel")'); + continue; + } + const value = parseFieldAnswer(field, answer); + if (value === undefined) { + ui.attention('invalid value, try again'); + continue; + } + content[name] = value; + break; + } + if (isRequired && !(name in content)) { + // Never return an accept that violates the requested schema. + ui.status('no valid answer for a required field — cancelling'); + return { action: 'cancel' }; + } + } + return { action: 'accept', content }; + } catch { + return { action: 'cancel' }; + } +} + +const BUILTIN_COMMANDS = ['/help', '/servers', '/tools', '/resources', '/prompts', '/roots', '/root add ', '/quit']; + +/** + * Tab completion for the interactive CLI: slash commands and prompt names complete from the + * connected servers' prompt lists, `@server:uri` mentions complete from their resource lists, + * and prompt argument values complete through MCP `completion/complete` — the same data a + * richer host would put behind its picker UI. Tab completes the common prefix; a second Tab + * lists the remaining options (readline's standard behavior). + */ +export function createCompleter(getHost: () => McpHost | undefined): (line: string) => Promise<[string[], string]> { + return async line => { + try { + const host = getHost(); + if (!host) return [[], line]; + + // `@server:uri` mentions — complete the current word from the cached resource lists. + const mention = /(^|\s)(@\S*)$/.exec(line)?.[2]; + if (mention !== undefined) { + const candidates = [ + ...[...host.servers.keys()].map(name => `@${name}:`), + ...host.listResources().map(({ server, resource }) => `@${server}:${resource.uri}`) + ]; + return [candidates.filter(candidate => candidate.startsWith(mention)), mention]; + } + + // `/server:prompt arg=value …` — complete argument names, and argument values via completion/complete. + const promptArgs = /^\/([A-Za-z0-9_-]+):(\S+)\s+(?:.*\s)?([A-Za-z0-9_-]*)(=?)([^\s=]*)$/.exec(line); + if (promptArgs) { + const [, serverName = '', promptName = '', argumentName = '', equals = '', partial = ''] = promptArgs; + const found = host.findPrompt(serverName, promptName); + if (!found) return [[], line]; + if (equals === '=') { + const values = await host.completePromptArgument(found.server.name, found.prompt.name, argumentName, partial); + const suggestions = values.map(value => (/\s/.test(value) ? `${argumentName}="${value}"` : `${argumentName}=${value}`)); + return [suggestions, `${argumentName}=${partial}`]; + } + const names = (found.prompt.arguments ?? []).map(argument => `${argument.name}=`); + return [names.filter(name => name.startsWith(argumentName)), argumentName]; + } + + // Slash commands and prompt commands. + if (line.startsWith('/') && !line.includes(' ')) { + const candidates = [...BUILTIN_COMMANDS, ...host.listPrompts().map(({ server, prompt }) => `/${server}:${prompt.name} `)]; + return [candidates.filter(candidate => candidate.startsWith(line)), line]; + } + + return [[], line]; + } catch { + return [[], line]; + } + }; +} diff --git a/examples/cli-client/package.json b/examples/cli-client/package.json new file mode 100644 index 0000000000..7a44590361 --- /dev/null +++ b/examples/cli-client/package.json @@ -0,0 +1,33 @@ +{ + "name": "@mcp-examples/cli-client", + "private": true, + "type": "module", + "scripts": { + "start": "tsx cli.ts", + "client": "tsx client.ts", + "test": "vitest run", + "test:watch": "vitest" + }, + "dependencies": { + "@anthropic-ai/sdk": "^0.74.0", + "@google/genai": "^1.0.0", + "@mcp-examples/shared": "workspace:*", + "@modelcontextprotocol/client": "workspace:*", + "open": "^11.0.0", + "openai": "^6.0.0", + "zod": "catalog:runtimeShared" + }, + "devDependencies": { + "tsx": "catalog:devTools", + "vitest": "catalog:devTools" + }, + "example": { + "era": "dual", + "timeoutMs": 60000, + "expects": { + "stdout": "cli-client e2e: all checks passed" + }, + "shapeExempt": "Reference host, not a single-feature story: a multi-file package (host/, providers/, script/, test/) whose server lives in the sibling examples/todos-server package. server.ts here is a thin shim so the runner's http legs can spawn the paired server.", + "//": "client.ts replays a scripted conversation (ScriptedProvider, no API keys) against the sibling todos-server. All four transport/era legs run; the sampling-backed prioritize tool, the multi-round brainstorm flow, and the elicitation-confirmed clear_done tool are exercised on the modern legs and on stdio/legacy, and skipped on http/legacy (createMcpHandler's stateless legacy posture has no return path for push-style server→client requests). cli.ts is the interactive entry for humans." + } +} diff --git a/examples/cli-client/providers/anthropic.ts b/examples/cli-client/providers/anthropic.ts new file mode 100644 index 0000000000..701fa03f03 --- /dev/null +++ b/examples/cli-client/providers/anthropic.ts @@ -0,0 +1,141 @@ +import Anthropic from '@anthropic-ai/sdk'; + +import type { ChatMessage, ContentPart, GenerateRequest, GenerateResult, LLMProvider, ToolCall } from './provider'; +import { isRecord } from './provider'; + +/** One provider-neutral content part → one Anthropic block (text passthrough, supported images, placeholder otherwise). */ +function partToBlock(part: ContentPart): Anthropic.TextBlockParam | Anthropic.ImageBlockParam { + if (part.type === 'text') { + return { type: 'text', text: part.text }; + } + if ( + part.mimeType === 'image/jpeg' || + part.mimeType === 'image/png' || + part.mimeType === 'image/gif' || + part.mimeType === 'image/webp' + ) { + return { type: 'image', source: { type: 'base64', media_type: part.mimeType, data: part.data } }; + } + return { type: 'text', text: `[image omitted: unsupported media type ${part.mimeType}]` }; +} + +function toContentBlocks(message: ChatMessage): Anthropic.ContentBlockParam[] { + return message.content.filter(part => part.type !== 'text' || part.text.length > 0).map(part => partToBlock(part)); +} + +/** + * Convert the provider-neutral request into Anthropic Messages API parameters. + * + * The mapping every host writes for Claude: + * - MCP tool definitions pass straight through — `inputSchema` is already JSON Schema. + * - Assistant tool calls become `tool_use` blocks; tool results become `tool_result` blocks + * inside a *user* message, and results for parallel tool calls must share one user message. + * - `isError` from MCP becomes `is_error` so the model knows the call failed. + */ +export function toAnthropicRequest(request: GenerateRequest, model: string): Anthropic.MessageCreateParamsNonStreaming { + const messages: Anthropic.MessageParam[] = []; + + for (const message of request.messages) { + if (message.role === 'tool') { + const resultBlock: Anthropic.ToolResultBlockParam = { + type: 'tool_result', + tool_use_id: message.toolCallId, + is_error: message.isError ?? false, + content: message.content.map(part => partToBlock(part)) + }; + const previous = messages.at(-1); + if (previous?.role === 'user' && Array.isArray(previous.content)) { + previous.content.push(resultBlock); + } else { + messages.push({ role: 'user', content: [resultBlock] }); + } + continue; + } + + if (message.role === 'assistant') { + const blocks: Anthropic.ContentBlockParam[] = toContentBlocks(message); + for (const call of message.toolCalls ?? []) { + blocks.push({ type: 'tool_use', id: call.id, name: call.name, input: call.arguments }); + } + if (blocks.length > 0) messages.push({ role: 'assistant', content: blocks }); + continue; + } + + const blocks = toContentBlocks(message); + if (blocks.length > 0) messages.push({ role: 'user', content: blocks }); + } + + return { + model, + max_tokens: request.maxTokens ?? 1024, + ...(request.temperature === undefined ? {} : { temperature: request.temperature }), + ...(request.system === undefined ? {} : { system: request.system }), + messages, + tools: (request.tools ?? []).map(tool => ({ + name: tool.name, + description: tool.description ?? '', + input_schema: { ...tool.inputSchema, type: 'object' } + })) + }; +} + +/** Pull text + tool calls back out of an Anthropic response. */ +export function fromAnthropicResponse(response: Anthropic.Message): GenerateResult { + const textParts: string[] = []; + const toolCalls: ToolCall[] = []; + for (const block of response.content) { + if (block.type === 'text') { + textParts.push(block.text); + } else if (block.type === 'tool_use') { + toolCalls.push({ id: block.id, name: block.name, arguments: isRecord(block.input) ? block.input : {} }); + } + } + const stopReason: GenerateResult['stopReason'] = + response.stop_reason === 'tool_use' + ? 'tool_use' + : response.stop_reason === 'max_tokens' + ? 'max_tokens' + : response.stop_reason === 'end_turn' || response.stop_reason === 'stop_sequence' + ? 'end_turn' + : 'other'; + return { text: textParts.join('\n'), toolCalls, stopReason, model: response.model }; +} + +export class AnthropicProvider implements LLMProvider { + readonly name = 'anthropic'; + private readonly client: Anthropic; + private model?: string; + + constructor(model?: string) { + // The SDK reads either an API key or a bearer token (ANTHROPIC_AUTH_TOKEN) from the env. + if (!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_AUTH_TOKEN) { + throw new Error('Neither ANTHROPIC_API_KEY nor ANTHROPIC_AUTH_TOKEN is set — export one or pick a different --provider'); + } + this.client = new Anthropic(); + this.model = model ?? process.env.ANTHROPIC_MODEL; + } + + /** + * Model ids change faster than examples do, so nothing is hardcoded here: unless pinned + * via `--model` / `ANTHROPIC_MODEL`, ask the API for its model list and use the newest + * Sonnet-class (mid-tier) model. + */ + private async resolveModel(): Promise { + if (this.model) return this.model; + const models = await this.client.models.list({ limit: 100 }); + const newestSonnet = models.data + .filter(model => model.id.includes('sonnet')) + .toSorted((a, b) => Date.parse(b.created_at) - Date.parse(a.created_at))[0]; + if (!newestSonnet) { + throw new Error('No Sonnet-class model found on the Anthropic API — pass --model or set ANTHROPIC_MODEL'); + } + this.model = newestSonnet.id; + return this.model; + } + + async generate(request: GenerateRequest): Promise { + const model = await this.resolveModel(); + const response = await this.client.messages.create(toAnthropicRequest(request, model)); + return fromAnthropicResponse(response); + } +} diff --git a/examples/cli-client/providers/gemini.ts b/examples/cli-client/providers/gemini.ts new file mode 100644 index 0000000000..48aef0eb11 --- /dev/null +++ b/examples/cli-client/providers/gemini.ts @@ -0,0 +1,127 @@ +import type { Content, FunctionDeclaration, GenerateContentParameters, GenerateContentResponse, Part } from '@google/genai'; +import { GoogleGenAI } from '@google/genai'; + +import type { ChatMessage, GenerateRequest, GenerateResult, LLMProvider, ToolCall } from './provider'; +import { isRecord, partsToText } from './provider'; + +function toParts(message: ChatMessage): Part[] { + return message.content + .filter(part => part.type !== 'text' || part.text.length > 0) + .map(part => (part.type === 'text' ? { text: part.text } : { inlineData: { mimeType: part.mimeType, data: part.data } })); +} + +/** + * Convert the provider-neutral request into `generateContent` parameters. + * + * The mapping every host writes for Gemini: + * - MCP `inputSchema` passes through as `parametersJsonSchema` (raw JSON Schema — no + * conversion to the OpenAPI-style `parameters` subset needed). + * - Assistant tool calls become `functionCall` parts; tool results go back as + * `functionResponse` parts keyed by the *function name* (Gemini has no call ids on the + * wire, so cli-client's generated ids stay host-side). + * - Conversation roles are `user` / `model`. + */ +export function toGeminiRequest(request: GenerateRequest, model: string): GenerateContentParameters { + const contents: Content[] = []; + for (const message of request.messages) { + if (message.role === 'tool') { + const responsePart: Part = { + functionResponse: { + name: message.toolName, + response: { content: partsToText(message.content), ...(message.isError ? { isError: true } : {}) } + } + }; + // Results for parallel function calls must arrive together in one user turn. + const previous = contents.at(-1); + if (previous?.role === 'user' && previous.parts?.every(part => part.functionResponse)) { + previous.parts.push(responsePart); + } else { + contents.push({ role: 'user', parts: [responsePart] }); + } + } else if (message.role === 'assistant') { + const parts: Part[] = toParts(message); + for (const call of message.toolCalls ?? []) { + parts.push({ functionCall: { name: call.name, args: call.arguments } }); + } + if (parts.length > 0) contents.push({ role: 'model', parts }); + } else { + const parts = toParts(message); + if (parts.length > 0) contents.push({ role: 'user', parts }); + } + } + + const functionDeclarations: FunctionDeclaration[] = (request.tools ?? []).map(tool => ({ + name: tool.name, + description: tool.description ?? '', + parametersJsonSchema: tool.inputSchema + })); + + return { + model, + contents, + config: { + ...(request.system === undefined ? {} : { systemInstruction: request.system }), + ...(request.maxTokens === undefined ? {} : { maxOutputTokens: request.maxTokens }), + ...(request.temperature === undefined ? {} : { temperature: request.temperature }), + ...(functionDeclarations.length > 0 ? { tools: [{ functionDeclarations }] } : {}) + } + }; +} + +/** Pull text + tool calls back out of a `generateContent` response. */ +export function fromGeminiResponse(response: GenerateContentResponse, model: string): GenerateResult { + const toolCalls: ToolCall[] = (response.functionCalls ?? []).map((call, index) => ({ + id: call.id ?? `call_${index + 1}`, + name: call.name ?? '', + arguments: isRecord(call.args) ? call.args : {} + })); + const finishReason = String(response.candidates?.[0]?.finishReason ?? ''); + const stopReason: GenerateResult['stopReason'] = + toolCalls.length > 0 ? 'tool_use' : finishReason === 'MAX_TOKENS' ? 'max_tokens' : finishReason === 'STOP' ? 'end_turn' : 'other'; + return { text: response.text ?? '', toolCalls, stopReason, model }; +} + +export class GeminiProvider implements LLMProvider { + readonly name = 'gemini'; + private readonly client: GoogleGenAI; + private model?: string; + + constructor(model?: string) { + if (!process.env.GEMINI_API_KEY) { + throw new Error('GEMINI_API_KEY is not set — export it or pick a different --provider'); + } + this.client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); + this.model = model ?? process.env.GEMINI_MODEL; + } + + /** + * Model ids change faster than examples do, so nothing is hardcoded here: unless pinned + * via `--model` / `GEMINI_MODEL`, ask the API for its model list and use the newest + * stable Flash (mid-tier) model. + */ + private async resolveModel(): Promise { + if (this.model) return this.model; + const stableFlash = /^models\/gemini-(\d+(?:\.\d+)?)-flash$/; + let newest: { id: string; version: number } | undefined; + for await (const model of await this.client.models.list()) { + const name = model.name; + const match = name?.match(stableFlash); + if (!name || !match) continue; + const version = Number.parseFloat(match[1] ?? '0'); + if (!newest || version > newest.version) { + newest = { id: name.replace(/^models\//, ''), version }; + } + } + if (!newest) { + throw new Error('No stable gemini--flash model found on the Gemini API — pass --model or set GEMINI_MODEL'); + } + this.model = newest.id; + return this.model; + } + + async generate(request: GenerateRequest): Promise { + const model = await this.resolveModel(); + const response = await this.client.models.generateContent(toGeminiRequest(request, model)); + return fromGeminiResponse(response, model); + } +} diff --git a/examples/cli-client/providers/openai.ts b/examples/cli-client/providers/openai.ts new file mode 100644 index 0000000000..b1d7b811e7 --- /dev/null +++ b/examples/cli-client/providers/openai.ts @@ -0,0 +1,139 @@ +import OpenAI from 'openai'; + +import type { GenerateRequest, GenerateResult, LLMProvider, ToolCall } from './provider'; +import { isRecord, partsToText } from './provider'; + +/** + * Convert the provider-neutral request into Chat Completions parameters. + * + * The mapping every host writes for OpenAI-compatible APIs: + * - MCP `inputSchema` passes straight through as the function `parameters` (JSON Schema). + * - Assistant tool calls become `tool_calls` with JSON-encoded arguments; tool results become + * `role: 'tool'` messages keyed by `tool_call_id`. + * - Chat Completions tool messages are text-only, so failures are prefixed with `[tool error]` + * and images are reduced to placeholders. + */ +export function toOpenAIRequest(request: GenerateRequest, model: string): OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming { + const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = []; + if (request.system) { + messages.push({ role: 'system', content: request.system }); + } + for (const message of request.messages) { + if (message.role === 'tool') { + const text = partsToText(message.content); + messages.push({ + role: 'tool', + tool_call_id: message.toolCallId, + content: message.isError ? `[tool error] ${text}` : text + }); + } else if (message.role === 'assistant') { + const toolCalls = (message.toolCalls ?? []).map(call => ({ + id: call.id, + type: 'function' as const, + function: { name: call.name, arguments: JSON.stringify(call.arguments) } + })); + const text = partsToText(message.content); + // The API rejects assistant messages that carry neither content nor tool calls. + if (!text && toolCalls.length === 0) continue; + messages.push({ + role: 'assistant', + content: text || null, + ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}) + }); + } else { + messages.push({ + role: 'user', + content: message.content.map(part => + part.type === 'text' + ? { type: 'text' as const, text: part.text } + : { type: 'image_url' as const, image_url: { url: `data:${part.mimeType};base64,${part.data}` } } + ) + }); + } + } + return { + model, + messages, + ...(request.maxTokens === undefined ? {} : { max_completion_tokens: request.maxTokens }), + ...(request.temperature === undefined ? {} : { temperature: request.temperature }), + ...((request.tools ?? []).length > 0 + ? { + tools: (request.tools ?? []).map(tool => ({ + type: 'function' as const, + function: { name: tool.name, description: tool.description ?? '', parameters: tool.inputSchema } + })) + } + : {}) + }; +} + +/** Pull text + tool calls back out of a Chat Completions response. */ +export function fromOpenAIResponse(response: OpenAI.Chat.Completions.ChatCompletion): GenerateResult { + const choice = response.choices[0]; + const toolCalls: ToolCall[] = []; + for (const call of choice?.message.tool_calls ?? []) { + if (call.type !== 'function') continue; + let parsed: unknown; + try { + parsed = JSON.parse(call.function.arguments || '{}'); + } catch { + parsed = {}; + } + toolCalls.push({ id: call.id, name: call.function.name, arguments: isRecord(parsed) ? parsed : {} }); + } + const finishReason = choice?.finish_reason; + const stopReason: GenerateResult['stopReason'] = + finishReason === 'tool_calls' + ? 'tool_use' + : finishReason === 'length' + ? 'max_tokens' + : finishReason === 'stop' + ? 'end_turn' + : 'other'; + return { text: choice?.message.content ?? '', toolCalls, stopReason, model: response.model }; +} + +/** + * Works against api.openai.com by default; point `OPENAI_BASE_URL` at any Chat-Completions + * compatible endpoint (Gemini's compatibility layer, Ollama, vLLM, …) to reuse this mapping. + */ +export class OpenAIProvider implements LLMProvider { + readonly name = 'openai'; + private readonly client: OpenAI; + private model?: string; + + constructor(model?: string) { + if (!process.env.OPENAI_API_KEY) { + throw new Error('OPENAI_API_KEY is not set — export it or pick a different --provider'); + } + this.client = new OpenAI({ baseURL: process.env.OPENAI_BASE_URL }); + this.model = model ?? process.env.OPENAI_MODEL; + } + + /** + * Model ids change faster than examples do, so nothing is hardcoded here: unless pinned + * via `--model` / `OPENAI_MODEL`, ask the API for its model list and use the newest + * mainline `gpt-` model (the mid-tier one — not -pro, -mini, or -nano variants). + */ + private async resolveModel(): Promise { + if (this.model) return this.model; + const mainline = /^gpt-\d+(?:\.\d+)?$/; + let newest: { id: string; created: number } | undefined; + for await (const model of this.client.models.list()) { + if (mainline.test(model.id) && (!newest || model.created > newest.created)) { + newest = model; + } + } + if (!newest) { + throw new Error('No mainline gpt- model found on the OpenAI API — pass --model or set OPENAI_MODEL'); + } + this.model = newest.id; + return this.model; + } + + async generate(request: GenerateRequest): Promise { + const model = await this.resolveModel(); + const response = await this.client.chat.completions.create(toOpenAIRequest(request, model)); + return fromOpenAIResponse(response); + } +} diff --git a/examples/cli-client/providers/provider.ts b/examples/cli-client/providers/provider.ts new file mode 100644 index 0000000000..5877c248b0 --- /dev/null +++ b/examples/cli-client/providers/provider.ts @@ -0,0 +1,73 @@ +/** + * The seam where MCP meets the model. + * + * `LLMProvider` is the only thing the rest of cli-client knows about a language model: given + * the conversation so far and the MCP tools currently available, produce the next assistant + * turn (text and/or tool calls). Each file in providers/ is the complete mapping for one + * provider API — if you are building your own host, copy the one for the provider you use. + * + * The same interface serves both directions: the chat loop calls it to drive the + * conversation, and the MCP sampling handler calls it to answer `sampling/createMessage` + * requests from servers. + */ + +//#region llmProvider +export interface ToolDefinition { + /** Namespaced tool name as exposed to the model (e.g. `mcp__todos__add_task`). */ + name: string; + description?: string; + /** JSON Schema for the tool's arguments, passed through from the MCP `Tool.inputSchema`. */ + inputSchema: Record; +} + +export type ContentPart = { type: 'text'; text: string } | { type: 'image'; mimeType: string; data: string }; + +export interface ToolCall { + /** Provider-assigned id, echoed back on the matching `role: 'tool'` message. */ + id: string; + /** Namespaced tool name (matches a `ToolDefinition.name`). */ + name: string; + arguments: Record; +} + +export type ChatMessage = + | { role: 'user'; content: ContentPart[] } + | { role: 'assistant'; content: ContentPart[]; toolCalls?: ToolCall[] } + | { role: 'tool'; toolCallId: string; toolName: string; content: ContentPart[]; isError?: boolean }; + +export interface GenerateRequest { + system?: string; + messages: ChatMessage[]; + tools?: ToolDefinition[]; + maxTokens?: number; + temperature?: number; +} + +export interface GenerateResult { + /** Assistant prose (may be empty when the model only calls tools). */ + text: string; + /** Tool calls the host must execute and feed back as `role: 'tool'` messages. */ + toolCalls: ToolCall[]; + stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'other'; + /** Provider-reported model id (also used to answer MCP sampling requests). */ + model: string; +} + +export interface LLMProvider { + readonly name: string; + generate(request: GenerateRequest): Promise; +} +//#endregion llmProvider + +export function textPart(text: string): ContentPart { + return { type: 'text', text }; +} + +/** Flatten content parts to plain text, replacing non-text parts with a placeholder. */ +export function partsToText(parts: ContentPart[]): string { + return parts.map(part => (part.type === 'text' ? part.text : `[image: ${part.mimeType}]`)).join('\n'); +} + +export function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} diff --git a/examples/cli-client/providers/scripted.ts b/examples/cli-client/providers/scripted.ts new file mode 100644 index 0000000000..20bc1de865 --- /dev/null +++ b/examples/cli-client/providers/scripted.ts @@ -0,0 +1,46 @@ +import type { GenerateRequest, GenerateResult, LLMProvider, ToolCall } from './provider'; + +export interface ScriptedTurn { + /** Optional inspection hook — the e2e driver uses it to assert on the request the host built. */ + expect?: (request: GenerateRequest) => void; + text?: string; + toolCalls?: ToolCall[]; +} + +/** + * Replays a fixed sequence of assistant turns. No keys, no network — this is what CI runs + * (`client.ts`) and what `--provider scripted` gives you locally. Each `generate()` call + * consumes the next turn in order; because the MCP sampling handler goes through the same + * provider, sampling requests consume turns too. + */ +export class ScriptedProvider implements LLMProvider { + readonly name = 'scripted'; + private next = 0; + + constructor(private readonly turns: ScriptedTurn[] = []) {} + + /** Turns that have not been consumed yet (the e2e driver asserts this reaches 0). */ + get remaining(): number { + return Math.max(0, this.turns.length - this.next); + } + + generate(request: GenerateRequest): Promise { + const turn = this.turns[this.next++]; + if (!turn) { + return Promise.resolve({ + text: '(scripted provider has no turns left — run with --provider anthropic|openai|gemini for a real model)', + toolCalls: [], + stopReason: 'end_turn', + model: 'scripted' + }); + } + turn.expect?.(request); + const toolCalls = turn.toolCalls ?? []; + return Promise.resolve({ + text: turn.text ?? '', + toolCalls, + stopReason: toolCalls.length > 0 ? 'tool_use' : 'end_turn', + model: 'scripted' + }); + } +} diff --git a/examples/cli-client/script/scriptedUi.ts b/examples/cli-client/script/scriptedUi.ts new file mode 100644 index 0000000000..3bc5eeb2ee --- /dev/null +++ b/examples/cli-client/script/scriptedUi.ts @@ -0,0 +1,83 @@ +import type { HostUI } from '../host/ui'; + +/** + * The UI used by the e2e driver: answers come from pre-loaded queues instead of a human, and + * everything the host would have shown is recorded so the driver can assert on it afterwards. + * Unanswered confirmations fail closed (false), like a human who walked away. + */ +export class ScriptedUI implements HostUI { + readonly printed: string[] = []; + readonly statuses: string[] = []; + readonly serverLogs: Array<{ server: string; level: string; text: string }> = []; + readonly questions: string[] = []; + + private readonly confirmAnswers: boolean[]; + private readonly askAnswers: string[]; + private cancelHandler?: () => void; + /** When set, the next status() line that includes this substring fires the in-flight tool-call cancel handler once. */ + cancelOnStatusMatching?: string; + + constructor(options: { confirmAnswers?: boolean[]; askAnswers?: string[] } = {}) { + this.confirmAnswers = [...(options.confirmAnswers ?? [])]; + this.askAnswers = [...(options.askAnswers ?? [])]; + } + + get unansweredConfirms(): number { + return this.confirmAnswers.length; + } + + get unansweredAsks(): number { + return this.askAnswers.length; + } + + print(text: string): void { + this.printed.push(text); + console.log(text); + } + + attention(text: string): void { + this.printed.push(text); + console.log(text); + } + + status(text: string): void { + this.statuses.push(text); + console.log(` · ${text}`); + if (this.cancelOnStatusMatching && text.includes(this.cancelOnStatusMatching) && this.cancelHandler) { + this.cancelOnStatusMatching = undefined; + this.cancelHandler(); + } + } + + note(text: string): void { + this.statuses.push(text); + console.log(` ▍ ${text}`); + } + + serverLog(server: string, level: string, text: string): void { + this.serverLogs.push({ server, level, text }); + console.log(` [${server}] ${level}: ${text}`); + } + + confirm(question: string): Promise { + this.questions.push(question); + return Promise.resolve(this.confirmAnswers.shift() ?? false); + } + + ask(question: string): Promise { + this.questions.push(question); + return Promise.resolve(this.askAnswers.shift() ?? ''); + } + + spinner(): () => void { + return noop; + } + + setCancelHandler(handler: (() => void) | undefined): void { + this.cancelHandler = handler; + } +} + +function noop(): void { + // The scripted driver has no spinner to stop. +} diff --git a/examples/cli-client/script/session.ts b/examples/cli-client/script/session.ts new file mode 100644 index 0000000000..215d15bf39 --- /dev/null +++ b/examples/cli-client/script/session.ts @@ -0,0 +1,343 @@ +import { check } from '@mcp-examples/shared'; + +import type { McpHost } from '../host/host'; +import type { ChatMessage, GenerateRequest } from '../providers/provider'; +import type { ScriptedProvider, ScriptedTurn } from '../providers/scripted'; +import type { ScriptedUI } from './scriptedUi'; + +/** + * The scripted e2e conversation the CI driver replays against the sibling todos-server. + * Each provider turn plays the model's part and asserts on the request the host built for it, + * so a passing run proves the loop, namespacing, resource attachment, prompt-role handling, + * sampling, and elicitation all actually round-tripped. + */ +export interface ScriptedSession { + turns: ScriptedTurn[]; + inputs: string[]; + confirmAnswers: boolean[]; + askAnswers: string[]; + /** Hooks the driver runs once before each input is dispatched (e.g. to arm cancellation). */ + beforeInput?: Array<((ui: ScriptedUI) => void) | undefined>; + verify(context: { ui: ScriptedUI; provider: ScriptedProvider; host: McpHost; era: string; transport: string }): Promise; +} + +function messageText(message: ChatMessage): string { + return message.content.map(part => (part.type === 'text' ? part.text : `[image]`)).join('\n'); +} + +function lastMessage(request: GenerateRequest): ChatMessage { + const message = request.messages.at(-1); + check.ok(message, 'expected at least one message'); + return message; +} + +export function buildScriptedSession(options: { interactive: boolean }): ScriptedSession { + const { interactive } = options; + const turns: ScriptedTurn[] = []; + const beforeInput: Array<((ui: ScriptedUI) => void) | undefined> = []; + const inputs: string[] = ['Add a task to write the Q3 report by Friday, high priority.']; + + // 1. Plain chat → the model calls a namespaced tool, the result comes back, it answers. + turns.push( + { + expect: request => { + check.ok(request.system?.includes('todo board'), 'system prompt should fold in the todos server instructions'); + check.ok( + request.tools?.some(tool => tool.name === 'mcp__todos__add_task'), + 'aggregated tools should include the namespaced add_task' + ); + }, + toolCalls: [ + { + id: 'call_add', + name: 'mcp__todos__add_task', + arguments: { title: 'Write the Q3 report', project: 'planning', priority: 'high', due: 'Friday' } + } + ] + }, + { + expect: request => { + const message = lastMessage(request); + check.equal(message.role, 'tool', 'tool result should be fed back as a tool message'); + check.ok(message.role === 'tool' && message.isError !== true, 'add_task should not error'); + check.ok(messageText(message).includes('Write the Q3 report'), 'tool result should mention the new task'); + }, + text: 'Added "Write the Q3 report" to the board as a high-priority task due Friday.' + } + ); + + // 2. Bulk add → per-item progress notifications stream while the tool runs. + inputs.push('Also add tasks to update the cli-client docs and to fix the flaky deploy test.'); + turns.push( + { + toolCalls: [ + { + id: 'call_bulk', + name: 'mcp__todos__add_tasks', + arguments: { + tasks: [ + { title: 'Update the cli-client docs', project: 'cli-client' }, + { title: 'Fix the flaky deploy test', project: 'ops' } + ] + } + } + ] + }, + { + expect: request => { + check.ok(messageText(lastMessage(request)).includes('Added 2 task(s)'), 'add_tasks should report both tasks'); + }, + text: 'Added both tasks to the board.' + } + ); + + // 3. @-mention → the board resource is injected as provenance-labelled context. + inputs.push('@todos:todos://board which of these should I tackle first?'); + turns.push({ + expect: request => { + const message = lastMessage(request); + check.equal(message.role, 'user'); + const text = messageText(message); + check.ok(text.includes(''), 'resource context should carry provenance'); + check.ok(text.includes('Write the Q3 report'), 'attached board should already contain the new task'); + }, + text: 'Start with the Q3 report — it is high priority and due Friday.' + }); + + // Watch the board: a host command (no model turn); later mutations should produce update notes. + inputs.push('/watch @todos:todos://board', '/todos:plan-my-day focus=cli-client'); + turns.push({ + expect: request => { + const fromPrompt = request.messages.filter( + message => message.role === 'assistant' && messageText(message).includes('I can see your board') + ); + check.equal(fromPrompt.length, 1, 'the prompt-provided assistant turn should stay an assistant turn'); + check.ok( + request.messages.some(message => message.role === 'user' && messageText(message).includes('"cli-client" project')), + 'the prompt argument should appear in the seeded user turn' + ); + }, + text: 'Plan for today: 1) Review the cli-client pull request, 2) Send standup notes to the team.' + }); + + if (interactive) { + // 4a. The deepest multi-round flow: brainstorm_tasks (theme+count elicitation form, + // then approval-gated sampling, with HMAC-signed requestState carried between rounds). + inputs.push('Brainstorm a few tasks for me.'); + turns.push( + { toolCalls: [{ id: 'call_brainstorm', name: 'mcp__todos__brainstorm_tasks', arguments: {} }] }, + { + expect: request => { + check.ok(!request.tools?.length, 'the brainstorm sampling request should not carry the chat tools'); + check.ok( + messageText(lastMessage(request)).includes('Invent 5 todo tasks'), + 'the brainstorm sampling request should carry the resolved theme and count' + ); + }, + text: ['Reboot the flux capacitor', 'Explain the snorkel cluster outage', 'Convince Jenkins to behave'].join('\n') + }, + { + expect: request => { + const message = lastMessage(request); + check.equal(message.role, 'tool'); + check.ok(messageText(message).includes('Added 3 brainstormed task(s)'), 'brainstorm should report the tasks it added'); + }, + text: 'Three brainstormed tasks added to the board.' + } + ); + + // 4b. Sampling: the prioritize tool borrows the host's model (after the approval gate). + inputs.push('Prioritize my open tasks.'); + turns.push( + { toolCalls: [{ id: 'call_prioritize', name: 'mcp__todos__prioritize', arguments: {} }] }, + { + expect: request => { + check.ok(!request.tools?.length, 'sampling requests should not carry the chat tools'); + check.ok(request.system?.includes('prioritize todo lists'), 'sampling should pass the server systemPrompt through'); + check.ok( + messageText(lastMessage(request)).includes('Rank these tasks'), + 'sampling should carry the server-provided messages' + ); + }, + text: ['Write the Q3 report', 'Fix the flaky deploy test', 'Update the cli-client docs'].join('\n') + }, + { + expect: request => { + const message = lastMessage(request); + check.equal(message.role, 'tool'); + check.ok(messageText(message).includes('Re-prioritized'), 'prioritize result should report the new ranking'); + }, + text: 'Done — I ranked your open tasks and updated their priorities.' + } + ); + } + + // 5. Another tool round (gives clear_done something to delete). + inputs.push('Mark the flaky deploy test task as done.'); + turns.push( + { toolCalls: [{ id: 'call_complete', name: 'mcp__todos__complete_task', arguments: { task: 'flaky deploy' } }] }, + { + expect: request => { + check.ok(messageText(lastMessage(request)).includes('Marked'), 'complete_task result should confirm'); + }, + text: 'Marked "Fix the flaky deploy test" as done.' + } + ); + + if (interactive) { + // 6. Elicitation: clear_done asks for confirmation through a terminal form. + inputs.push('Clear my completed tasks.'); + turns.push( + { toolCalls: [{ id: 'call_clear', name: 'mcp__todos__clear_done', arguments: {} }] }, + { + expect: request => { + check.ok(messageText(lastMessage(request)).includes('Deleted'), 'clear_done should report how many tasks it deleted'); + }, + text: 'Cleared the completed tasks from the board.' + } + ); + } + + // Finale: a long-running tool — work through whatever is still open, with live progress. + inputs.push('Now work through everything that is still open.'); + turns.push( + { toolCalls: [{ id: 'call_work', name: 'mcp__todos__work_through_tasks', arguments: { secondsPerTask: 0.3 } }] }, + { + expect: request => { + check.ok(messageText(lastMessage(request)).includes('Worked through'), 'work_through_tasks should report what it finished'); + }, + text: 'All done — every open task has been worked through.' + } + ); + + // Cancellation: add fresh tasks, start a slow work-through, and abort it on the first + // progress line — proving the host signal abort → notifications/cancelled → tool-error + // path round-trips and the model is told. + inputs.push('Add a couple of placeholder tasks and start working through them.'); + beforeInput[inputs.length - 1] = ui => { + ui.cancelOnStatusMatching = 'mcp__todos__work_through_tasks: finished'; + }; + turns.push( + { + toolCalls: [ + { + id: 'call_seed_cancel', + name: 'mcp__todos__add_tasks', + arguments: { + tasks: [ + { title: 'Placeholder task A', project: 'cancel-test' }, + { title: 'Placeholder task B', project: 'cancel-test' } + ] + } + }, + { id: 'call_work_cancel', name: 'mcp__todos__work_through_tasks', arguments: { secondsPerTask: 0.6 } } + ] + }, + { + expect: request => { + const message = lastMessage(request); + check.equal(message.role, 'tool'); + check.ok( + message.role === 'tool' && message.isError === true, + 'a cancelled tool call should reach the model as an error result' + ); + check.ok(messageText(message).includes('cancelled by the user'), 'the cancellation should be labelled'); + }, + text: 'Stopped — that work-through was cancelled.' + } + ); + + return { + turns, + inputs, + beforeInput, + // brainstorm: theme '' (Enter for default) + count '5'; clear_done: confirm 'y'. + // brainstorm + prioritize each gate one sampling approval. + confirmAnswers: interactive ? [true, true] : [], + askAnswers: interactive ? ['', '5', 'y'] : [], + async verify({ ui, provider, host, transport }) { + check.equal(provider.remaining, 0, 'every scripted model turn should have been consumed'); + check.equal(ui.unansweredConfirms, 0, 'every scripted confirmation should have been consumed'); + check.equal(ui.unansweredAsks, 0, 'every scripted form answer should have been consumed'); + + // End-state assertions against the live server, read the same way a user would. + const todos = host.servers.get('todos'); + check.ok(todos, 'the todos server should be connected'); + const board = await todos.client.readResource({ uri: 'todos://board' }); + const boardText = board.contents.map(item => ('text' in item ? item.text : '')).join('\n'); + check.ok(boardText.includes('Write the Q3 report'), 'the added task should be on the board'); + if (interactive) { + // 'low' can only come from the prioritize ranking — nothing else assigns a low priority. + check.ok(boardText.includes('priority: low'), 'prioritize should have stamped priorities'); + check.ok(!boardText.includes('Fix the flaky deploy test'), 'clear_done should have removed the completed task'); + } else { + check.ok(boardText.includes('[x] Fix the flaky deploy test'), 'complete_task should have marked the task done'); + } + + if (interactive) { + check.ok( + ui.printed.some(text => text.includes('wants to run an LLM request')), + 'the sampling approval gate should have been shown' + ); + check.ok( + ui.questions.some(question => question.includes('Allow?')), + 'the sampling approval question should have been asked' + ); + } + // completion/complete: the seed-board theme arg is completable() with a fixed list. + const themeCompletions = await host.completePromptArgument('todos', 'seed-board', 'theme', 'space'); + check.ok( + themeCompletions.includes('space-station maintenance'), + 'completion/complete should return matching completable() values for prompt arguments' + ); + const focusCompletions = await host.completePromptArgument('todos', 'plan-my-day', 'focus', ''); + check.ok(focusCompletions.length > 0, 'completion/complete should return current project names for plan-my-day focus'); + + check.ok( + ui.statuses.some(status => status.includes('watching @todos:todos://board')), + 'the /watch command should have subscribed to the board' + ); + check.ok(!ui.statuses.some(status => status.includes('could not watch')), 'the /watch subscription should not have failed'); + check.ok( + ui.statuses.some(status => status.includes('cancelling mcp__todos__work_through_tasks')), + 'the scripted cancellation should have fired' + ); + if (interactive) { + check.ok( + ui.questions.some(question => question.includes('Theme for the invented tasks')), + 'brainstorm_tasks should have elicited the theme/count form' + ); + check.ok(boardText.includes('Reboot the flux capacitor'), 'brainstormed tasks should be on the board'); + } + if (transport === 'stdio') { + // 'todos info' entries can only come from notifications/message — stderr lines are tagged 'stderr'. + check.ok( + ui.serverLogs.some(log => log.level.includes('info')), + 'server log notifications should have been rendered' + ); + check.ok( + ui.statuses.some(status => status.includes('resource list changed')), + 'resources/list_changed should have refreshed the cached list' + ); + } + if (transport === 'stdio') { + check.ok( + ui.statuses.some(status => status.includes('mcp__todos__add_tasks') && status.includes('(2/2)')), + 'progress notifications from add_tasks should have been rendered' + ); + check.ok( + ui.statuses.some(status => status.includes('mcp__todos__work_through_tasks') && status.includes('/')), + 'progress notifications from work_through_tasks should have been rendered' + ); + check.ok( + ui.serverLogs.some(log => log.text.includes('working on')), + 'work_through_tasks should narrate each task through log notifications' + ); + check.ok( + ui.statuses.some(status => status.includes('resource updated:')), + 'watching the board should have produced resources/updated notes' + ); + } + } + }; +} diff --git a/examples/cli-client/server.ts b/examples/cli-client/server.ts new file mode 100644 index 0000000000..17c0b4b612 --- /dev/null +++ b/examples/cli-client/server.ts @@ -0,0 +1,6 @@ +/** + * Runner shim: the cli-client story's server is the sibling examples/todos-server package. + * The example runner spawns `/server.ts` for http legs, so this file just executes + * the real entry (argv passes through untouched). + */ +import '../todos-server/server'; diff --git a/examples/cli-client/test/auth.test.ts b/examples/cli-client/test/auth.test.ts new file mode 100644 index 0000000000..b0d00f770c --- /dev/null +++ b/examples/cli-client/test/auth.test.ts @@ -0,0 +1,142 @@ +import { describe, expect, it } from 'vitest'; + +import { + CliOAuthClientProvider, + completeAuthorizationWithBrowser, + createOAuthProvider, + findCallbackPort, + waitForOAuthCallback +} from '../host/auth'; +import { ScriptedUI } from '../script/scriptedUi'; + +/** Poll the loopback callback endpoint until the listener is up, then deliver the query. */ +async function deliverCallback(port: number, query: string): Promise { + for (let attempt = 0; attempt < 50; attempt++) { + try { + await fetch(`http://127.0.0.1:${port}/callback?${query}`); + return; + } catch { + await new Promise(resolve => setTimeout(resolve, 50)); + } + } + throw new Error('callback server never came up'); +} + +describe('CliOAuthClientProvider', () => { + it('round-trips the state the SDK stores on it and supports scoped invalidation', () => { + const provider = createOAuthProvider('todos', 8123); + expect(provider.clientMetadata.redirect_uris).toEqual(['http://127.0.0.1:8123/callback']); + + expect(provider.state()).toBe(provider.state()); + provider.saveTokens({ access_token: 'a', token_type: 'bearer' }); + provider.saveCodeVerifier('verifier'); + provider.saveClientInformation({ client_id: 'client' }); + expect(provider.tokens()?.access_token).toBe('a'); + expect(provider.codeVerifier()).toBe('verifier'); + provider.invalidateCredentials('tokens'); + expect(provider.tokens()).toBeUndefined(); + expect(provider.clientInformation()?.client_id).toBe('client'); + provider.invalidateCredentials('all'); + expect(provider.clientInformation()).toBeUndefined(); + }); + + it('defers the redirect instead of opening anything during connect()', () => { + const provider = new CliOAuthClientProvider('http://localhost:1/callback', { redirect_uris: ['http://localhost:1/callback'] }); + provider.redirectToAuthorization(new URL('https://auth.example.com/authorize?state=s')); + expect(provider.pendingAuthorizationUrl?.hostname).toBe('auth.example.com'); + }); +}); + +describe('loopback callback server', () => { + it('resolves with the callback query parameters', async () => { + const port = await findCallbackPort(); + const callback = waitForOAuthCallback(port); + await deliverCallback(port, 'code=abc&state=xyz'); + const params = await callback; + expect(params.get('code')).toBe('abc'); + expect(params.get('state')).toBe('xyz'); + }); +}); + +describe('completeAuthorizationWithBrowser', () => { + it('does nothing when the user declines', async () => { + const provider = createOAuthProvider('todos', 8124); + provider.redirectToAuthorization(new URL('http://127.0.0.1:9/authorize')); + let exchanged = false; + const authorized = await completeAuthorizationWithBrowser({ + serverName: 'todos', + ui: new ScriptedUI({ confirmAnswers: [false] }), + provider, + callbackPort: 8124, + finishAuth: async () => { + exchanged = true; + } + }); + expect(authorized).toBe(false); + expect(exchanged).toBe(false); + }); + + it('refuses to open a non-https, non-loopback authorization URL', async () => { + const provider = createOAuthProvider('todos', 8125); + provider.redirectToAuthorization(new URL('http://auth.example.com/authorize')); + let opened = false; + const authorized = await completeAuthorizationWithBrowser({ + serverName: 'todos', + ui: new ScriptedUI({ confirmAnswers: [true] }), + provider, + callbackPort: 8125, + finishAuth: async () => {}, + openUrl: async () => { + opened = true; + } + }); + expect(authorized).toBe(false); + expect(opened).toBe(false); + }); + + it('rejects a callback whose state does not match', async () => { + const port = await findCallbackPort(); + const provider = createOAuthProvider('todos', port); + const expectedState = provider.state(); + provider.redirectToAuthorization(new URL(`http://127.0.0.1:9/authorize?state=${expectedState}`)); + let exchanged = false; + const pending = completeAuthorizationWithBrowser({ + serverName: 'todos', + ui: new ScriptedUI({ confirmAnswers: [true] }), + provider, + callbackPort: port, + finishAuth: async () => { + exchanged = true; + }, + openUrl: async () => {} + }); + await deliverCallback(port, 'code=abc&state=wrong'); + expect(await pending).toBe(false); + expect(exchanged).toBe(false); + }); + + it('exchanges the code when the state matches', async () => { + const port = await findCallbackPort(); + const provider = createOAuthProvider('todos', port); + const expectedState = provider.state(); + provider.redirectToAuthorization(new URL(`http://127.0.0.1:9/authorize?state=${expectedState}`)); + let receivedCode: string | null = null; + let openedUrl: string | undefined; + const pending = completeAuthorizationWithBrowser({ + serverName: 'todos', + ui: new ScriptedUI({ confirmAnswers: [true] }), + provider, + callbackPort: port, + finishAuth: async params => { + receivedCode = params.get('code'); + }, + openUrl: async url => { + openedUrl = url; + } + }); + await deliverCallback(port, `code=secret-code&state=${expectedState}`); + expect(await pending).toBe(true); + expect(receivedCode).toBe('secret-code'); + expect(openedUrl).toBe(`http://127.0.0.1:9/authorize?state=${expectedState}`); + }); +}); diff --git a/examples/cli-client/test/forms.test.ts b/examples/cli-client/test/forms.test.ts new file mode 100644 index 0000000000..02fbf06c7b --- /dev/null +++ b/examples/cli-client/test/forms.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from 'vitest'; + +import { extractMentions, parsePromptArgs } from '../host/loop'; +import { collectFormInput, describeField, parseFieldAnswer } from '../host/ui'; +import { ScriptedUI } from '../script/scriptedUi'; + +const SCHEMA = { + type: 'object' as const, + properties: { + confirm: { type: 'boolean' as const, title: 'Really?' }, + count: { type: 'string' as const, enum: ['5', '10', '20', '50', 'custom'] }, + name: { type: 'string' as const, description: 'Your name' } + }, + required: ['confirm', 'count'] +}; + +describe('elicitation form helpers', () => { + it('describes fields with their constraints', () => { + expect(describeField('confirm', SCHEMA.properties.confirm, true)).toContain('Really?'); + expect(describeField('confirm', SCHEMA.properties.confirm, true)).toContain('(required)'); + expect(describeField('count', SCHEMA.properties.count, false)).toContain('options: 5, 10, 20, 50, custom'); + }); + + it('parses answers per primitive type and rejects invalid values', () => { + expect(parseFieldAnswer({ type: 'boolean' }, 'y')).toBe(true); + expect(parseFieldAnswer({ type: 'boolean' }, 'maybe')).toBeUndefined(); + expect(parseFieldAnswer({ type: 'integer', minimum: 1, maximum: 10 }, '5')).toBe(5); + expect(parseFieldAnswer({ type: 'integer', minimum: 1, maximum: 10 }, '50')).toBeUndefined(); + expect(parseFieldAnswer({ type: 'integer' }, '2.5')).toBeUndefined(); + expect(parseFieldAnswer({ type: 'string', enum: ['a', 'b'] }, 'c')).toBeUndefined(); + expect(parseFieldAnswer({ type: 'array', items: { type: 'string', enum: ['x', 'y'] } }, 'x, y')).toEqual(['x', 'y']); + }); + + it('collects a full form through the UI', async () => { + const ui = new ScriptedUI({ askAnswers: ['y', '10', 'Felix'] }); + const result = await collectFormInput(ui, SCHEMA); + expect(result).toEqual({ action: 'accept', content: { confirm: true, count: '10', name: 'Felix' } }); + expect(ui.questions.some(question => question.includes('Really?'))).toBe(true); + }); + + it('treats decline and cancel as terminal answers and retries invalid input', async () => { + expect(await collectFormInput(new ScriptedUI({ askAnswers: ['decline'] }), SCHEMA)).toEqual({ action: 'decline' }); + expect(await collectFormInput(new ScriptedUI({ askAnswers: ['cancel'] }), SCHEMA)).toEqual({ action: 'cancel' }); + const retrying = new ScriptedUI({ askAnswers: ['maybe', 'y', '10', ''] }); + expect(await collectFormInput(retrying, SCHEMA)).toEqual({ action: 'accept', content: { confirm: true, count: '10' } }); + }); + + it('cancels rather than accepting when a required field never gets a valid answer', async () => { + const ui = new ScriptedUI({ askAnswers: ['maybe', 'maybe', 'maybe'] }); + expect(await collectFormInput(ui, SCHEMA)).toEqual({ action: 'cancel' }); + }); +}); + +describe('input parsing', () => { + it('extracts @server:uri mentions', () => { + const { text, mentions } = extractMentions('@todos:todos://board what should I do first?'); + expect(mentions).toEqual(['todos:todos://board']); + expect(text).toContain('what should I do first?'); + }); + + it('parses key=value prompt arguments, including quoted values', () => { + expect(parsePromptArgs('focus=cli-client note="ship it today"')).toEqual({ focus: 'cli-client', note: 'ship it today' }); + }); +}); diff --git a/examples/cli-client/test/host.test.ts b/examples/cli-client/test/host.test.ts new file mode 100644 index 0000000000..428b79a820 --- /dev/null +++ b/examples/cli-client/test/host.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from 'vitest'; + +import { configFromTargets, interpolateEnv, isHttpServer, parseConfig } from '../host/config'; +import { contentBlockToParts, resourceToContextText, stripAnsi, toolResultToParts, truncate } from '../host/content'; +import { namespaceTool, routeNamespacedTool, sanitizeServerName } from '../host/naming'; + +describe('tool namespacing and routing', () => { + it('sanitizes server names the way provider tool-name rules require', () => { + expect(sanitizeServerName('todos')).toBe('todos'); + expect(sanitizeServerName('my server.prod')).toBe('my_server_prod'); + }); + + it('routes namespaced calls back to the owning server, longest key first', () => { + const keys = ['todos', 'todos_staging']; + expect(routeNamespacedTool(namespaceTool('todos', 'add_task'), keys)).toEqual({ serverKey: 'todos', toolName: 'add_task' }); + expect(routeNamespacedTool('mcp__todos_staging__add_task', keys)).toEqual({ serverKey: 'todos_staging', toolName: 'add_task' }); + // Tool names may themselves contain double underscores. + expect(routeNamespacedTool('mcp__todos__weird__tool', keys)).toEqual({ serverKey: 'todos', toolName: 'weird__tool' }); + expect(routeNamespacedTool('mcp__unknown__x', keys)).toBeUndefined(); + expect(routeNamespacedTool('not-namespaced', keys)).toBeUndefined(); + }); +}); + +describe('content conversion', () => { + it('narrows every content block type', () => { + expect(contentBlockToParts({ type: 'text', text: 'hi' })).toEqual([{ type: 'text', text: 'hi' }]); + expect(contentBlockToParts({ type: 'image', data: 'abc', mimeType: 'image/png' })).toEqual([ + { type: 'image', mimeType: 'image/png', data: 'abc' } + ]); + expect(contentBlockToParts({ type: 'audio', data: 'abc', mimeType: 'audio/wav' })[0]?.type).toBe('text'); + expect(contentBlockToParts({ type: 'resource_link', uri: 'todos://board', name: 'board' })[0]).toMatchObject({ type: 'text' }); + expect(contentBlockToParts({ type: 'resource', resource: { uri: 'todos://board', text: 'open: 3' } })[0]).toMatchObject({ + type: 'text', + text: expect.stringContaining('open: 3') + }); + expect( + contentBlockToParts({ + type: 'resource', + resource: { uri: 'todos://blob', blob: 'aGk=', mimeType: 'application/octet-stream' } + })[0] + ).toMatchObject({ type: 'text', text: expect.stringContaining('binary resource') }); + }); + + it('returns a placeholder for empty tool results and surfaces isError separately', () => { + expect(toolResultToParts({ content: [] })).toEqual([{ type: 'text', text: '(tool returned no content)' }]); + }); + + it('caps injected content and labels resource context with provenance', () => { + expect(truncate('abc', 2)).toContain('[truncated 1 characters'); + const context = resourceToContextText('todos', 'todos://board', { contents: [{ uri: 'todos://board', text: 'open: 3' }] }); + expect(context).toContain(''); + expect(context).toContain('open: 3'); + const binary = resourceToContextText('todos', 'todos://blob', { contents: [{ uri: 'todos://blob', blob: 'aGVsbG8=' }] }); + expect(binary).toContain('[binary content'); + }); + + it('strips ANSI escapes from server-provided text', () => { + expect(stripAnsi('red plain')).toBe('red plain'); + }); +}); + +describe('config parsing', () => { + it('accepts stdio and http entries and interpolates environment variables', () => { + const config = parseConfig( + JSON.stringify({ + mcpServers: { + todos: { command: 'npx', args: ['-y', 'tsx', 'server.ts'], env: { API_KEY: '${TEST_TOKEN}' } }, + remote: { url: 'https://example.com/mcp', headers: { Authorization: 'Bearer ${TEST_TOKEN}' } } + } + }), + { TEST_TOKEN: 'sekret' } + ); + const todos = config.mcpServers.todos; + const remote = config.mcpServers.remote; + expect(todos && !isHttpServer(todos) && todos.env?.API_KEY).toBe('sekret'); + expect(remote && isHttpServer(remote) && remote.headers?.Authorization).toBe('Bearer sekret'); + }); + + it('rejects entries that are neither stdio nor http', () => { + expect(() => parseConfig(JSON.stringify({ mcpServers: { broken: { nope: true } } }))).toThrow(); + }); + + it('leaves unknown ${VAR} references empty', () => { + expect(interpolateEnv('Bearer ${MISSING}', {})).toBe('Bearer '); + }); + + it('builds a config from ad-hoc --server targets', () => { + const config = configFromTargets(['https://mcp.linear.app/mcp', 'npx -y tsx server.ts']); + expect(config.mcpServers['linear']).toEqual({ url: 'https://mcp.linear.app/mcp' }); + expect(config.mcpServers['server']).toEqual({ command: 'npx', args: ['-y', 'tsx', 'server.ts'] }); + }); + + it('rejects an empty --server list', () => { + expect(() => configFromTargets([])).toThrow(); + }); +}); diff --git a/examples/cli-client/test/providers.test.ts b/examples/cli-client/test/providers.test.ts new file mode 100644 index 0000000000..fa42c37f6b --- /dev/null +++ b/examples/cli-client/test/providers.test.ts @@ -0,0 +1,168 @@ +import type Anthropic from '@anthropic-ai/sdk'; +import type { GenerateContentResponse } from '@google/genai'; +import type OpenAI from 'openai'; +import { describe, expect, it } from 'vitest'; + +import { fromAnthropicResponse, toAnthropicRequest } from '../providers/anthropic'; +import { fromGeminiResponse, toGeminiRequest } from '../providers/gemini'; +import { fromOpenAIResponse, toOpenAIRequest } from '../providers/openai'; +import type { ChatMessage, GenerateRequest } from '../providers/provider'; +import { ScriptedProvider } from '../providers/scripted'; + +const TOOL_SCHEMA = { type: 'object', properties: { title: { type: 'string' } }, required: ['title'] }; + +const conversation: ChatMessage[] = [ + { role: 'user', content: [{ type: 'text', text: 'add a task' }] }, + { + role: 'assistant', + content: [{ type: 'text', text: 'adding it' }], + toolCalls: [ + { id: 'call_1', name: 'mcp__todos__add_task', arguments: { title: 'Write the report' } }, + { id: 'call_2', name: 'mcp__todos__list_tasks', arguments: {} } + ] + }, + { role: 'tool', toolCallId: 'call_1', toolName: 'mcp__todos__add_task', content: [{ type: 'text', text: 'Added t1' }] }, + { role: 'tool', toolCallId: 'call_2', toolName: 'mcp__todos__list_tasks', content: [{ type: 'text', text: 'boom' }], isError: true } +]; + +const request: GenerateRequest = { + system: 'be helpful', + messages: conversation, + tools: [{ name: 'mcp__todos__add_task', description: 'Add a task', inputSchema: TOOL_SCHEMA }], + maxTokens: 256 +}; + +describe('anthropic mapping', () => { + it('builds a Messages API request with namespaced tools, tool_use blocks, and merged tool_result messages', () => { + const params = toAnthropicRequest(request, 'claude-test'); + expect(params.model).toBe('claude-test'); + expect(params.system).toBe('be helpful'); + expect(params.tools?.[0]).toMatchObject({ name: 'mcp__todos__add_task', input_schema: { type: 'object' } }); + + expect(params.messages).toHaveLength(3); + const assistant = params.messages[1]; + expect(assistant?.role).toBe('assistant'); + const assistantBlocks = assistant?.content as Anthropic.ContentBlockParam[]; + expect(assistantBlocks.filter(block => block.type === 'tool_use')).toHaveLength(2); + + // Both tool results (parallel calls) must land in ONE user message. + const toolResults = params.messages[2]; + expect(toolResults?.role).toBe('user'); + const blocks = toolResults?.content as Anthropic.ToolResultBlockParam[]; + expect(blocks.map(block => block.type)).toEqual(['tool_result', 'tool_result']); + expect(blocks[0]?.is_error).toBe(false); + expect(blocks[1]?.is_error).toBe(true); + }); + + it('parses text, tool calls, and stop reasons from a response', () => { + const response = { + id: 'msg_1', + type: 'message', + role: 'assistant', + model: 'claude-test', + content: [ + { type: 'text', text: 'on it', citations: null }, + { type: 'tool_use', id: 'call_9', name: 'mcp__todos__add_task', input: { title: 'x' } } + ], + stop_reason: 'tool_use', + stop_sequence: null, + usage: { input_tokens: 1, output_tokens: 1 } + // The fixture only carries the fields the mapping reads. + } as unknown as Anthropic.Message; + const result = fromAnthropicResponse(response); + expect(result.text).toBe('on it'); + expect(result.toolCalls).toEqual([{ id: 'call_9', name: 'mcp__todos__add_task', arguments: { title: 'x' } }]); + expect(result.stopReason).toBe('tool_use'); + }); +}); + +describe('openai mapping', () => { + it('builds a Chat Completions request with function tools, tool_calls, and tool-role results', () => { + const params = toOpenAIRequest(request, 'gpt-test'); + expect(params.model).toBe('gpt-test'); + expect(params.messages[0]).toEqual({ role: 'system', content: 'be helpful' }); + expect(params.tools?.[0]).toMatchObject({ type: 'function', function: { name: 'mcp__todos__add_task', parameters: TOOL_SCHEMA } }); + + const assistant = params.messages.find(message => message.role === 'assistant'); + expect(assistant && 'tool_calls' in assistant && assistant.tool_calls).toHaveLength(2); + const toolMessages = params.messages.filter(message => message.role === 'tool'); + expect(toolMessages).toHaveLength(2); + expect(toolMessages[1]?.content).toContain('[tool error]'); + }); + + it('parses tool calls (including malformed JSON arguments) from a response', () => { + const response = { + id: 'chatcmpl-1', + object: 'chat.completion', + created: 0, + model: 'gpt-test', + choices: [ + { + index: 0, + finish_reason: 'tool_calls', + logprobs: null, + message: { + role: 'assistant', + content: null, + refusal: null, + tool_calls: [ + { id: 'a', type: 'function', function: { name: 'mcp__todos__add_task', arguments: '{"title":"x"}' } }, + { id: 'b', type: 'function', function: { name: 'mcp__todos__list_tasks', arguments: 'not json' } } + ] + } + } + ] + } as unknown as OpenAI.Chat.Completions.ChatCompletion; + const result = fromOpenAIResponse(response); + expect(result.toolCalls).toEqual([ + { id: 'a', name: 'mcp__todos__add_task', arguments: { title: 'x' } }, + { id: 'b', name: 'mcp__todos__list_tasks', arguments: {} } + ]); + expect(result.stopReason).toBe('tool_use'); + }); +}); + +describe('gemini mapping', () => { + it('passes MCP JSON Schema through and maps tool results to functionResponse parts', () => { + const params = toGeminiRequest(request, 'gemini-test'); + expect(params.model).toBe('gemini-test'); + const config = params.config; + expect(config?.systemInstruction).toBe('be helpful'); + expect(config?.tools?.[0]).toMatchObject({ + functionDeclarations: [{ name: 'mcp__todos__add_task', parametersJsonSchema: TOOL_SCHEMA }] + }); + + const contents = params.contents as Array<{ role?: string; parts?: Array> }>; + expect(contents).toHaveLength(3); + expect(contents[1]?.role).toBe('model'); + expect(contents[1]?.parts?.some(part => 'functionCall' in part)).toBe(true); + // Results for parallel function calls must share one user turn. + expect(contents[2]?.parts?.[0]).toMatchObject({ functionResponse: { name: 'mcp__todos__add_task' } }); + expect(contents[2]?.parts?.[1]).toMatchObject({ functionResponse: { response: { isError: true } } }); + }); + + it('parses text and function calls from a response, generating ids when missing', () => { + const response = { + candidates: [{ content: { role: 'model', parts: [{ text: 'done' }] }, finishReason: 'STOP' }], + functionCalls: [{ name: 'mcp__todos__add_task', args: { title: 'x' } }], + text: 'done' + } as unknown as GenerateContentResponse; + const result = fromGeminiResponse(response, 'gemini-test'); + expect(result.text).toBe('done'); + expect(result.toolCalls).toEqual([{ id: 'call_1', name: 'mcp__todos__add_task', arguments: { title: 'x' } }]); + expect(result.stopReason).toBe('tool_use'); + }); +}); + +describe('scripted provider', () => { + it('replays turns in order and reports leftovers', async () => { + const provider = new ScriptedProvider([{ text: 'one' }, { toolCalls: [{ id: 'c', name: 't', arguments: {} }] }]); + const first = await provider.generate({ messages: [] }); + expect(first.text).toBe('one'); + const second = await provider.generate({ messages: [] }); + expect(second.stopReason).toBe('tool_use'); + expect(provider.remaining).toBe(0); + const exhausted = await provider.generate({ messages: [] }); + expect(exhausted.text).toContain('no turns left'); + }); +}); diff --git a/examples/cli-client/vitest.config.js b/examples/cli-client/vitest.config.js new file mode 100644 index 0000000000..e85733af91 --- /dev/null +++ b/examples/cli-client/vitest.config.js @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'node', + include: ['test/**/*.test.ts'] + } +}); diff --git a/examples/todos-server/README.md b/examples/todos-server/README.md new file mode 100644 index 0000000000..d7abd3f282 --- /dev/null +++ b/examples/todos-server/README.md @@ -0,0 +1,81 @@ +# todos-server — the reference MCP server + +A small project todo board where **every server-side MCP feature has a real job**: tools that mutate state, resources that expose it, prompts that seed conversations, sampling that borrows the connected host's model, elicitation that asks the user, progress and logs while it works, and per-resource subscriptions that announce every change. It is the workload [`cli-client`](../cli-client/README.md) (the reference host) connects to out of the box — think of it as the "polls app" of MCP servers: small enough to read in one sitting, real enough that nothing in it is contrived. + +It serves **both protocol revisions at once** — 2026-07-28 and 2025-11-25 are negotiated per connection, from the same code — and **both transports**: stdio and Streamable HTTP. + +## Run it + +From the repo root (first time: `pnpm install && pnpm build:all`): + +```bash +# stdio — for hosts that spawn their servers as child processes +pnpm --filter @mcp-examples/todos-server start + +# Streamable HTTP — for remote-style connections (default port 3000; --port to change) +pnpm --filter @mcp-examples/todos-server start:http +``` + +Over stdio the server speaks on stdin/stdout (its own diagnostics go to stderr). Over HTTP it serves `http://127.0.0.1:3000/mcp` via `createMcpHandler`'s per-request model. + +There is no era flag on the server: `serveStdio` and `createMcpHandler` detect each connection's revision during the handshake and pin the instance accordingly, so a 2025-era client and a 2026-era client can talk to the same process — simultaneously, over HTTP. + +## Connect cli-client to it + +```bash +# Two terminals: serve over HTTP, then point the reference host at it +pnpm --filter @mcp-examples/todos-server start:http # terminal A +pnpm --filter @mcp-examples/cli-client start -- --server http://127.0.0.1:3000/mcp # terminal B + +# Same, but force the 2025-era handshake on the client to see the legacy arm in action +pnpm --filter @mcp-examples/cli-client start -- --server http://127.0.0.1:3000/mcp --legacy +``` + +The client's status line shows what was negotiated: `connected to "todos" (2026-07-28, 8 tools, …)` vs `(2025-11-25, …)`. + +You don't need the HTTP step for a quick look — running `cli-client` with no arguments spawns this server over stdio automatically. + +Any other `mcpServers`-style host can spawn it too: + +```jsonc +{ + "mcpServers": { + "todos": { "command": "npx", "args": ["-y", "tsx", "/absolute/path/to/examples/todos-server/server.ts"] } + } +} +``` + +## What demonstrates what + +| Server feature | Where it lives | Notes | +| -------------------------- | ------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Tools | `add_task`, `add_tasks`, `list_tasks`, `complete_task` | plain CRUD; `add_task` also returns `structuredContent` against an `outputSchema` | +| Sampling | `prioritize`, `brainstorm_tasks` | the server borrows the _host's_ model; the host shows the request for approval first | +| Elicitation (form) | `clear_done`, `brainstorm_tasks` | schema-driven forms; accept / decline / cancel all handled | +| Multi-round input_required | `brainstorm_tasks` | theme+count form → optional custom-amount round → sampling round; state rides `requestState` as a **step-discriminated union**, HMAC-signed via `createRequestStateCodec` | +| Progress + cancellation | `work_through_tasks`, `add_tasks` | paced per-task progress notifications; `work_through_tasks` checks `ctx.mcpReq.signal` between tasks and stops early when the host cancels | +| Logging | every mutating tool, via `ctx.mcpReq.log` | honours `logging/setLevel` on 2025 connections and the per-request log-level `_meta` opt-in on 2026-07-28 | +| Resources | `todos://board`, `todos://tasks/{id}` | one concrete resource + a `ResourceTemplate` with a completion callback for task ids | +| Subscriptions | the board | `resources/subscribe`/`unsubscribe` handlers for 2025-era clients; `subscriptions/listen` routing for 2026-07-28; every mutation notifies | +| list_changed | every mutation | resource list + resource updated notifications, delivered correctly over stdio and per-request HTTP | +| Prompts + completions | `plan-my-day`, `seed-board` | `completable()` argument values (project names, themes) wired to `completion/complete` | + +The two protocol eras differ in how interactive tools converse with the client: on 2025-era connections the server _pushes_ `elicitation/create` / `sampling/createMessage` requests and awaits them inline; on 2026-07-28 it returns `input_required` results and the client retries the call with the answers. The interactive tools (`brainstorm_tasks`, `clear_done`, `prioritize`) implement both arms — branch on `reqCtx.era` to compare them side by side. + +One serving-mode caveat: over **HTTP with a 2025-era client**, `createMcpHandler`'s default stateless posture has no return path for push-style server→client requests, so the sampling/elicitation tools refuse cleanly on that leg (stdio is unaffected; 2026-07-28 HTTP is unaffected). + +## Configuration + +| Env var | Effect | +| ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `REQUEST_STATE_SECRET` | HMAC key for the signed `requestState` (≥ 32 bytes). Unset, the server generates a per-process random key — fine whenever a single process serves the whole flow. | +| `PORT` | HTTP port when `--port` isn't passed (default 3000). | + +## Layout + +```text +server.ts transport entry: serveStdio by default, createMcpHandler + node adapter behind --http +todos.ts the application: state, tools, resources, prompts, subscriptions — every feature above +``` + +This package is intentionally **server-only**; its end-to-end coverage comes from the [`cli-client`](../cli-client/README.md) scripted e2e, which drives it across stdio + HTTP on both protocol eras in CI. diff --git a/examples/todos-server/package.json b/examples/todos-server/package.json new file mode 100644 index 0000000000..a7798a2abf --- /dev/null +++ b/examples/todos-server/package.json @@ -0,0 +1,22 @@ +{ + "name": "@mcp-examples/todos-server", + "private": true, + "type": "module", + "scripts": { + "start": "tsx server.ts", + "start:http": "tsx server.ts --http" + }, + "dependencies": { + "@mcp-examples/shared": "workspace:*", + "@modelcontextprotocol/node": "workspace:*", + "@modelcontextprotocol/server": "workspace:*", + "zod": "catalog:runtimeShared" + }, + "devDependencies": { + "tsx": "catalog:devTools" + }, + "example": { + "excluded": "server-only package — exercised end-to-end by the examples/cli-client e2e legs", + "shapeExempt": "Reference server, not a single-feature story: no client.ts of its own; the paired host lives in examples/cli-client." + } +} diff --git a/examples/todos-server/server.ts b/examples/todos-server/server.ts new file mode 100644 index 0000000000..72ea371608 --- /dev/null +++ b/examples/todos-server/server.ts @@ -0,0 +1,29 @@ +/** + * Transport entry point for the "todos" reference server (the application itself lives in + * todos.ts). Same dual-transport skeleton as every other example: stdio by default + * (cli-client spawns it as a child process), Streamable HTTP behind `--http`. + */ +import { createServer } from 'node:http'; + +import { parseExampleArgs } from '@mcp-examples/shared'; +import { toNodeHandler } from '@modelcontextprotocol/node'; +import { createMcpHandler } from '@modelcontextprotocol/server'; +import { serveStdio } from '@modelcontextprotocol/server/stdio'; + +import { buildServer, onBoardChanged, onBoardUpdated } from './todos'; + +const { transport, port } = parseExampleArgs(); + +if (transport === 'stdio') { + void serveStdio(buildServer); + console.error('[todos] serving over stdio'); +} else { + const handler = createMcpHandler(buildServer); + // Per-request serving has no connection to push notifications down — cross-request + // events (the board changing) are published through the handler's notifier instead. + onBoardChanged(() => handler.notify.resourcesChanged()); + onBoardUpdated(uri => handler.notify.resourceUpdated(uri)); + createServer(toNodeHandler(handler)).listen(port, () => { + console.error(`[todos] listening on http://127.0.0.1:${port}/mcp`); + }); +} diff --git a/examples/todos-server/todos.ts b/examples/todos-server/todos.ts new file mode 100644 index 0000000000..40220e39e1 --- /dev/null +++ b/examples/todos-server/todos.ts @@ -0,0 +1,713 @@ +/** + * The "todos" demo application — the workload cli-client connects to out of the box. + * + * It is a small but believable application (a project todo board) where every MCP feature has + * a job: CRUD tools the model calls from chat, the board and each task exposed as resources, + * planning/seeding prompts, a sampling-backed `prioritize` tool that borrows the *host's* + * model, elicitation-confirmed `clear_done` and `brainstorm_tasks`, and logging/progress while + * it works. State is in-memory and per-process; the point is the wiring, not the persistence. + * The transport entry point that serves this over stdio / Streamable HTTP is ../server.ts. + */ +import type { + CallToolResult, + ElicitRequestFormParams, + InputRequiredResult, + McpRequestContext, + ServerContext +} from '@modelcontextprotocol/server'; +import { + acceptedContent, + completable, + createRequestStateCodec, + inputRequired, + McpServer, + ResourceTemplate +} from '@modelcontextprotocol/server'; +import * as z from 'zod/v4'; + +/** + * The brainstorm_tasks flow as an explicit state machine: each variant names the round the + * handler is waiting on and carries exactly the data the next round needs. The handler + * dispatches on `step`, not on which `inputResponses` key happens to be present. + */ +type BrainstormState = + | { step: 'awaiting-count' } + | { step: 'awaiting-custom-count'; topic: string } + | { step: 'awaiting-ideas'; topic: string; count: number }; + +/** + * HMAC-signs the `requestState` round-tripped through brainstorm_tasks' multi-round flow so a + * client cannot forge or mutate the carried step/theme/count. The seam runs `verify` before the + * handler (rejecting tampered state with -32602); the handler calls `verify` again to decode. + * The key comes from the environment for real deployments and falls back to a per-process + * random one for the zero-setup demo (which is fine because one process serves every round). + */ +const stateCodec = createRequestStateCodec({ + key: process.env.REQUEST_STATE_SECRET ?? crypto.getRandomValues(new Uint8Array(32)) +}); + +/** Read the `action` from a raw elicitation `inputResponses` entry (decline/cancel detection). */ +function elicitAction(response: unknown): 'accept' | 'decline' | 'cancel' { + const action = typeof response === 'object' && response !== null && 'action' in response ? response.action : undefined; + return action === 'accept' || action === 'decline' ? action : 'cancel'; +} + +/** Read the text content from a raw sampling (`createMessage`) `inputResponses` entry. */ +function sampledText(response: unknown): string { + const content = typeof response === 'object' && response !== null && 'content' in response ? response.content : undefined; + return typeof content === 'object' && content !== null && 'type' in content && content.type === 'text' && 'text' in content + ? String(content.text) + : ''; +} + +interface Task { + id: string; + title: string; + project: string; + priority?: 'high' | 'medium' | 'low'; + due?: string; + notes?: string; + status: 'open' | 'done'; +} + +let nextId = 1; +const tasks = new Map(); + +function addTask(task: Omit): Task { + const created: Task = { id: `t${nextId++}`, status: 'open', ...task }; + tasks.set(created.id, created); + return created; +} + +function openTasks(): Task[] { + return [...tasks.values()].filter(task => task.status === 'open'); +} + +function projects(): string[] { + return [...new Set([...tasks.values()].map(task => task.project))]; +} + +function describeTask(task: Task): string { + const details = [task.priority && `priority: ${task.priority}`, task.due && `due: ${task.due}`, task.notes].filter(Boolean).join(', '); + return `- [${task.status === 'done' ? 'x' : ' '}] ${task.title} (${task.id}, ${task.project}${details ? `; ${details}` : ''})`; +} + +function renderBoard(): string { + const done = [...tasks.values()].filter(task => task.status === 'done'); + return [ + '# Todo board', + '', + '## Open', + ...openTasks().map(task => describeTask(task)), + '', + '## Done', + ...done.map(task => describeTask(task)) + ].join('\n'); +} + +async function logInfo(ctx: ServerContext, text: string): Promise { + // Request-tied logging: honours the client's logging/setLevel threshold on 2025-era + // connections and the per-request logLevel opt-in on 2026-07-28 connections. + await ctx.mcpReq.log('info', text, 'todos'); +} + +/** + * Over per-request HTTP serving, instance-level notifications have nowhere to go — + * cross-request events are published through the handler's notifier instead. The transport + * entry (../server.ts) wires these up for the HTTP branch; over stdio they stay unset and the + * pinned instance's own notifications are routed by the serving entry. + */ +let publishBoardChanged: (() => void) | undefined; +let publishBoardUpdated: ((uri: string) => void) | undefined; + +export function onBoardChanged(publish: () => void): void { + publishBoardChanged = publish; +} + +export function onBoardUpdated(publish: (uri: string) => void): void { + publishBoardUpdated = publish; +} + +async function reportProgress(ctx: ServerContext, progress: number, total: number, message: string): Promise { + const progressToken = ctx.mcpReq._meta?.progressToken; + if (progressToken === undefined) return; + await ctx.mcpReq.notify({ method: 'notifications/progress', params: { progressToken, progress, total, message } }); +} + +const CLEAR_CONFIRM_SCHEMA: ElicitRequestFormParams['requestedSchema'] = { + type: 'object', + properties: { + confirm: { type: 'boolean', title: 'Delete all completed tasks?', description: 'This cannot be undone.' } + }, + required: ['confirm'] +}; + +const BRAINSTORM_COUNT_SCHEMA: ElicitRequestFormParams['requestedSchema'] = { + type: 'object', + properties: { + theme: { type: 'string', title: 'Theme for the invented tasks', default: "an engineer's week in hell" }, + count: { type: 'string', title: 'How many tasks should I invent?', enum: ['5', '10', '20', '50', 'custom'] } + }, + required: ['count'] +}; + +const BRAINSTORM_CUSTOM_COUNT_SCHEMA: ElicitRequestFormParams['requestedSchema'] = { + type: 'object', + properties: { + customCount: { type: 'integer', title: 'Custom amount', minimum: 1, maximum: 100 } + }, + required: ['customCount'] +}; + +function buildBrainstormSampling(topic: string, wanted: number) { + return { + systemPrompt: + 'You invent short, funny todo items for a given theme. For engineering-flavored themes, lean into in-jokes like ' + + '"Migrate the galactron database to omegastar" or "Ensure the tiddlywinks service speaks gRPC". ' + + 'Reply with one task per line, no numbering, no commentary.', + messages: [ + { + role: 'user' as const, + content: { type: 'text' as const, text: `Invent ${wanted} todo tasks for the theme "${topic}".` } + } + ], + maxTokens: Math.min(200 + wanted * 40, 1500) + }; +} + +/** What the server claims to be doing while it "works through" a task — pure colour for the log stream. */ +const WORK_QUIPS = [ + 'applying percussive maintenance', + 'turning it off and on again', + 'blaming DNS first, investigating second', + 'negotiating with the load balancer', + 'consulting the rubber duck for a second opinion', + 'writing the postmortem in advance to save time', + 'adding a TODO to remove the TODO', + 'rolling back the rollback' +]; + +/** Parse an elicited count value (a preset like "10" or a custom number) into a usable number. */ +function parseBrainstormCount(raw: unknown): number | undefined { + const count = Number.parseInt(String(raw), 10); + return Number.isNaN(count) || count < 1 || count > 100 ? undefined : count; +} + +/** Match the LLM's ranking (one title per line) back to tasks; unmentioned tasks keep their order at the end. */ +function applyRanking(rankingText: string, candidates: Task[]): Task[] { + const remaining = [...candidates]; + const ranked: Task[] = []; + for (const line of rankingText.split('\n')) { + const normalized = line.toLowerCase(); + const index = remaining.findIndex(task => normalized.includes(task.title.toLowerCase())); + if (index !== -1) ranked.push(...remaining.splice(index, 1)); + } + return [...ranked, ...remaining]; +} + +function priorityForRank(rank: number, total: number): Task['priority'] { + if (rank < Math.ceil(total / 3)) return 'high'; + if (rank < Math.ceil((2 * total) / 3)) return 'medium'; + return 'low'; +} + +export function buildServer(reqCtx: McpRequestContext): McpServer { + const server = new McpServer( + { name: 'todos', version: '1.0.0' }, + { + capabilities: { logging: {}, resources: { listChanged: true, subscribe: true } }, + requestState: { verify: stateCodec.verify }, + instructions: + 'todos is a small project todo board (it starts empty). Use list_tasks to see the board, add_task / add_tasks and complete_task to ' + + 'change it, prioritize to rank the open tasks, brainstorm_tasks to invent themed example tasks, work_through_tasks to finish every ' + + 'open task with progress updates, and clear_done to remove finished ones (it asks the user for confirmation). The full board is ' + + 'also available as the todos://board resource, and it can be watched/subscribed to for change notifications. ' + + 'When the user greets you or asks what to try, suggest this tour: 1) ask to brainstorm tasks (the server asks how many — ' + + 'elicitation — then borrows the host model — sampling), 2) ask to prioritize the open tasks (sampling), 3) run the plan-my-day ' + + 'prompt, 4) attach the todos://board resource as context and ask about it, 5) say "do all my tasks" and watch the progress and ' + + 'log notifications, 6) ask to clear completed tasks (an elicitation-confirmed bulk delete). Watching the board resource ' + + '(/watch in cli-client) shows live change notifications along the way.' + } + ); + + // Per-resource subscriptions: 2025-era clients call resources/subscribe (tracked here so + // updates only go to subscribers); 2026-07-28 clients use a subscriptions/listen filter and + // the serving entry routes the same notification onto it. + const subscribedUris = new Set(); + server.server.setRequestHandler('resources/subscribe', request => { + subscribedUris.add(request.params.uri); + return {}; + }); + server.server.setRequestHandler('resources/unsubscribe', request => { + subscribedUris.delete(request.params.uri); + return {}; + }); + + /** Tell connected clients the board changed: the resource list, and the board resource for watchers. */ + const announceBoardChange = async (): Promise => { + await server.sendResourceListChanged(); + if (publishBoardUpdated) { + // Per-request HTTP serving: cross-request delivery goes through the entry's notifier. + publishBoardChanged?.(); + publishBoardUpdated('todos://board'); + } else if (reqCtx.era === 'modern' || subscribedUris.has('todos://board')) { + // stdio: the serving entry routes this onto open listen subscriptions (2026-07-28); + // on 2025-era connections it goes out unsolicited, so only send it to subscribers. + await server.server.sendResourceUpdated({ uri: 'todos://board' }).catch(() => {}); + } + }; + + server.registerResource( + 'board', + 'todos://board', + { description: 'The whole todo board as markdown', mimeType: 'text/markdown' }, + async uri => ({ contents: [{ uri: uri.href, mimeType: 'text/markdown', text: renderBoard() }] }) + ); + + server.registerResource( + 'task', + new ResourceTemplate('todos://tasks/{id}', { + list: async () => ({ + resources: [...tasks.values()].map(task => ({ + uri: `todos://tasks/${task.id}`, + name: task.title, + mimeType: 'text/markdown' + })) + }), + complete: { id: value => [...tasks.keys()].filter(id => id.startsWith(value)) } + }), + { description: 'A single task by id', mimeType: 'text/markdown' }, + async (uri, variables) => { + const task = tasks.get(String(variables.id)); + return { + contents: [ + { + uri: uri.href, + mimeType: 'text/markdown', + text: task ? describeTask(task) : `No task with id ${String(variables.id)}` + } + ] + }; + } + ); + + server.registerPrompt( + 'seed-board', + { + description: 'Have the assistant invent themed example tasks and add them to the board (via add_tasks)', + argsSchema: z.object({ + theme: completable(z.string().describe('A theme for the invented tasks'), value => + [ + 'space-station maintenance', + 'wizard tower chores', + 'startup launch week', + "engineer's week in hell", + 'robot uprising prep' + ].filter(theme => theme.startsWith(value)) + ) + }) + }, + async ({ theme }) => ({ + messages: [ + { + role: 'user', + content: { + type: 'text', + text: `Invent five short, funny todo tasks for the theme "${theme}" and add them to my board with the add_tasks tool (use "${theme}" as the project). Then show me the board.` + } + } + ] + }) + ); + + server.registerPrompt( + 'plan-my-day', + { + description: 'Seed a planning conversation around the current board', + argsSchema: z.object({ + focus: completable(z.string().describe('Project to focus on'), value => + projects().filter(project => project.startsWith(value)) + ) + }) + }, + async ({ focus }) => ({ + messages: [ + { role: 'user', content: { type: 'text', text: `Here is my current todo board:\n\n${renderBoard()}` } }, + { role: 'assistant', content: { type: 'text', text: 'Got it — I can see your board. What should today look like?' } }, + { + role: 'user', + content: { + type: 'text', + text: `Plan my day around the "${focus}" project: pick at most three tasks, in order, and say why each one is next.` + } + } + ] + }) + ); + + server.registerTool( + 'add_task', + { + description: 'Add a task to the board', + inputSchema: z.object({ + title: z.string().describe('What needs doing'), + project: z.string().optional().describe('Project bucket, e.g. "ops"'), + priority: z.enum(['high', 'medium', 'low']).optional(), + due: z.string().optional().describe('Free-form due date, e.g. "Friday"'), + notes: z.string().optional() + }), + outputSchema: z.object({ id: z.string(), title: z.string(), status: z.enum(['open', 'done']) }) + }, + async ({ title, project, priority, due, notes }, ctx) => { + const task = addTask({ title, project: project ?? 'inbox', priority, due, notes }); + await announceBoardChange(); + await logInfo(ctx, `added ${task.id}: ${task.title}`); + return { + content: [{ type: 'text', text: `Added ${task.id}: ${describeTask(task)}` }], + structuredContent: { id: task.id, title: task.title, status: task.status } + }; + } + ); + + server.registerTool( + 'add_tasks', + { + description: 'Add several tasks to the board at once', + inputSchema: z.object({ + tasks: z + .array( + z.object({ + title: z.string(), + project: z.string().optional(), + priority: z.enum(['high', 'medium', 'low']).optional(), + due: z.string().optional(), + notes: z.string().optional() + }) + ) + .min(1) + .describe('Tasks to add') + }) + }, + async ({ tasks: newTasks }, ctx) => { + const added: Task[] = []; + for (const [index, task] of newTasks.entries()) { + // Pretend each insert takes a moment so the host has in-flight progress to render. + await new Promise(resolve => setTimeout(resolve, 100)); + added.push(addTask({ ...task, project: task.project ?? 'inbox' })); + await reportProgress(ctx, index + 1, newTasks.length, `added "${task.title}"`); + } + await announceBoardChange(); + await logInfo(ctx, `added ${added.length} task(s)`); + return { + content: [{ type: 'text', text: `Added ${added.length} task(s):\n${added.map(task => describeTask(task)).join('\n')}` }] + }; + } + ); + + server.registerTool( + 'brainstorm_tasks', + { + description: + 'Invent short, funny example tasks for a theme and add them to the board — asks the user how many (elicitation), then has the LLM connected to the host invent them (sampling)', + inputSchema: z.object({ + theme: z.string().optional().describe('Theme for the invented tasks (default: "an engineer\'s week in hell")') + }) + }, + async ({ theme }, ctx): Promise => { + // The theme can come from the model (tool argument) or from the user (the elicitation + // form's theme field, pre-filled with a default); the user's answer wins. + const fallbackTopic = theme ?? "an engineer's week in hell"; + const resolveTopic = (raw: unknown): string => (typeof raw === 'string' && raw.trim().length > 0 ? raw.trim() : fallbackTopic); + const countMessage = 'Let me invent some tasks for the board.'; + + const finish = async (ideasText: string, wanted: number, topic: string): Promise => { + const titles = ideasText + .split('\n') + .map(line => line.replace(/^[-*\d.\s]+/, '').trim()) + .filter(line => line.length > 0) + .slice(0, wanted); + if (titles.length === 0) { + return { content: [{ type: 'text', text: 'The model did not return any task ideas.' }], isError: true }; + } + const added = titles.map(title => addTask({ title, project: topic })); + await announceBoardChange(); + await logInfo(ctx, `brainstormed ${added.length} task(s) for "${topic}"`); + return { + content: [ + { + type: 'text', + text: `Added ${added.length} brainstormed task(s):\n${added.map(task => describeTask(task)).join('\n')}` + } + ] + }; + }; + const declined = (action: string): CallToolResult => ({ + content: [{ type: 'text', text: `Nothing added (user answered: ${action}).` }] + }); + + if (reqCtx.era === 'legacy') { + // 2025 era: push-style round trips, awaited inline — one elicitation for the theme + // and count (with a follow-up form only when the user picks "custom"), then the + // host's model invents the tasks (sampling). + const countResult = await ctx.mcpReq.elicitInput({ + mode: 'form', + message: countMessage, + requestedSchema: BRAINSTORM_COUNT_SCHEMA + }); + if (countResult.action !== 'accept') return declined(countResult.action); + const topic = resolveTopic(countResult.content?.theme); + let wanted = parseBrainstormCount(countResult.content?.count); + if (countResult.content?.count === 'custom') { + const customResult = await ctx.mcpReq.elicitInput({ + mode: 'form', + message: 'How many exactly?', + requestedSchema: BRAINSTORM_CUSTOM_COUNT_SCHEMA + }); + if (customResult.action !== 'accept') return declined(customResult.action); + wanted = parseBrainstormCount(customResult.content?.customCount); + } + if (wanted === undefined) return declined('cancel'); + const response = await ctx.mcpReq.requestSampling(buildBrainstormSampling(topic, wanted)); + const ideasText = !Array.isArray(response.content) && response.content.type === 'text' ? response.content.text : ''; + return finish(ideasText, wanted, topic); + } + + // 2026-07-28: the same conversation as a multi-round input_required chain. The + // handler is a state machine over BrainstormState — it dispatches on `state.step` + // (not on which inputResponses key arrived), so each round knows exactly which + // answer to read and which data is in scope. State is HMAC-signed by stateCodec; + // the seam already verified integrity before this handler ran, so verify here is + // the decode. + const state: BrainstormState | undefined = + ctx.mcpReq.requestState === undefined ? undefined : await stateCodec.verify(ctx.mcpReq.requestState, ctx); + const askForIdeas = async (count: number, topic: string): Promise => + inputRequired({ + inputRequests: { ideas: inputRequired.createMessage(buildBrainstormSampling(topic, count)) }, + requestState: await stateCodec.mint({ step: 'awaiting-ideas', topic, count }, ctx) + }); + + switch (state?.step) { + case undefined: { + // First call: ask for the theme and count. + return inputRequired({ + inputRequests: { count: inputRequired.elicit({ message: countMessage, requestedSchema: BRAINSTORM_COUNT_SCHEMA }) }, + requestState: await stateCodec.mint({ step: 'awaiting-count' }, ctx) + }); + } + case 'awaiting-count': { + const response = ctx.mcpReq.inputResponses?.['count']; + const accepted = acceptedContent<{ count?: string; theme?: string }>(ctx.mcpReq.inputResponses, 'count'); + if (accepted === undefined) return declined(elicitAction(response)); + const topic = resolveTopic(accepted.theme); + if (accepted.count === 'custom') { + return inputRequired({ + inputRequests: { + customCount: inputRequired.elicit({ + message: 'How many exactly?', + requestedSchema: BRAINSTORM_CUSTOM_COUNT_SCHEMA + }) + }, + requestState: await stateCodec.mint({ step: 'awaiting-custom-count', topic }, ctx) + }); + } + const wanted = parseBrainstormCount(accepted.count); + if (wanted === undefined) return declined('cancel'); + return askForIdeas(wanted, topic); + } + case 'awaiting-custom-count': { + const response = ctx.mcpReq.inputResponses?.['customCount']; + const accepted = acceptedContent<{ customCount?: number }>(ctx.mcpReq.inputResponses, 'customCount'); + const wanted = parseBrainstormCount(accepted?.customCount); + if (wanted === undefined) return declined(elicitAction(response)); + return askForIdeas(wanted, state.topic); + } + case 'awaiting-ideas': { + return finish(sampledText(ctx.mcpReq.inputResponses?.['ideas']), state.count, state.topic); + } + } + } + ); + + server.registerTool( + 'list_tasks', + { + description: 'List tasks on the board', + inputSchema: z.object({ + status: z.enum(['open', 'done', 'all']).optional().describe('Which tasks to list (default: open)'), + project: z.string().optional().describe('Only tasks in this project') + }) + }, + async ({ status, project }) => { + const wanted = status ?? 'open'; + const matching = [...tasks.values()].filter( + task => (wanted === 'all' || task.status === wanted) && (!project || task.project === project) + ); + return { + content: [ + { + type: 'text', + text: matching.length === 0 ? 'No matching tasks.' : matching.map(task => describeTask(task)).join('\n') + } + ] + }; + } + ); + + server.registerTool( + 'complete_task', + { + description: 'Mark a task as done', + inputSchema: z.object({ task: z.string().describe('Task id, or part of its title') }) + }, + async ({ task: query }, ctx) => { + const needle = query.toLowerCase(); + const task = tasks.get(query) ?? [...tasks.values()].find(candidate => candidate.title.toLowerCase().includes(needle)); + if (!task) { + return { content: [{ type: 'text', text: `No task matches "${query}".` }], isError: true }; + } + task.status = 'done'; + await announceBoardChange(); + await logInfo(ctx, `completed ${task.id}: ${task.title}`); + return { content: [{ type: 'text', text: `Marked "${task.title}" (${task.id}) as done.` }] }; + } + ); + + server.registerTool( + 'work_through_tasks', + { + description: + 'Work through every open task one by one (simulated, a few seconds each), logging what it is "doing", reporting progress, and marking each as done', + inputSchema: z.object({ + secondsPerTask: z.number().min(0).max(15).optional().describe('How long to pretend each task takes (default: 3 seconds)') + }) + }, + async ({ secondsPerTask }, ctx) => { + const queue = openTasks(); + if (queue.length === 0) { + return { content: [{ type: 'text', text: 'Nothing open — the board is already clear.' }] }; + } + const paceMs = (secondsPerTask ?? 3) * 1000; + for (const [index, task] of queue.entries()) { + // Honour cancellation: if the client aborted the call (notifications/cancelled), + // stop early instead of ploughing through the rest of the queue. + if (ctx.mcpReq.signal.aborted) { + return { + content: [ + { type: 'text', text: `Stopped early — the request was cancelled after ${index} of ${queue.length} task(s).` } + ] + }; + } + // Narrate the "work" (a log notification per task), pretend it takes a moment so the + // host has live progress to render, then announce the board change for watchers. + await logInfo(ctx, `working on "${task.title}" — ${WORK_QUIPS[index % WORK_QUIPS.length] ?? 'working'}…`); + await new Promise(resolve => setTimeout(resolve, paceMs)); + task.status = 'done'; + await reportProgress(ctx, index + 1, queue.length, `finished "${task.title}"`); + await announceBoardChange(); + } + await logInfo(ctx, `worked through ${queue.length} open task(s)`); + return { + content: [ + { type: 'text', text: `Worked through ${queue.length} task(s):\n${queue.map(task => `- ${task.title} ✔`).join('\n')}` } + ] + }; + } + ); + + server.registerTool( + 'clear_done', + { description: 'Delete every completed task (asks the user to confirm first)' }, + async (ctx): Promise => { + const done = [...tasks.values()].filter(task => task.status === 'done'); + if (done.length === 0) return { content: [{ type: 'text', text: 'No completed tasks to clear.' }] }; + const message = `Delete ${done.length} completed task(s) from the board?`; + + let action: string; + let confirmation: { confirm?: boolean } | undefined; + if (reqCtx.era === 'legacy') { + // 2025 era: a push-style elicitation/create request, answered inline. + const result = await ctx.mcpReq.elicitInput({ mode: 'form', message, requestedSchema: CLEAR_CONFIRM_SCHEMA }); + action = result.action; + confirmation = result.action === 'accept' && result.content ? { confirm: result.content.confirm === true } : undefined; + } else { + // 2026-07-28: a single input_required round, so no requestState is needed — + // the first call has no inputResponses and returns the question; the re-call + // carries the answer. (For multi-round flows, dispatch on a discriminated + // requestState instead — see brainstorm_tasks.) + const response = ctx.mcpReq.inputResponses?.['confirmation']; + if (response === undefined) { + return inputRequired({ + inputRequests: { confirmation: inputRequired.elicit({ message, requestedSchema: CLEAR_CONFIRM_SCHEMA }) } + }); + } + action = elicitAction(response); + confirmation = acceptedContent<{ confirm?: boolean }>(ctx.mcpReq.inputResponses, 'confirmation'); + } + + if (confirmation?.confirm !== true) { + // Decline and cancel are answers — report them and stop, never ask again. + return { content: [{ type: 'text', text: `Nothing deleted (user answered: ${action}).` }] }; + } + for (const task of done) tasks.delete(task.id); + await announceBoardChange(); + await logInfo(ctx, `cleared ${done.length} completed task(s)`); + return { content: [{ type: 'text', text: `Deleted ${done.length} completed task(s).` }] }; + } + ); + + server.registerTool( + 'prioritize', + { description: 'Rank the open tasks by importance using the LLM connected to the host, and update their priorities' }, + async (ctx): Promise => { + const candidates = openTasks(); + if (candidates.length === 0) return { content: [{ type: 'text', text: 'No open tasks to prioritize.' }] }; + const samplingRequest = { + systemPrompt: 'You prioritize todo lists. Reply with one task title per line, most important first. No commentary.', + messages: [ + { + role: 'user' as const, + content: { + type: 'text' as const, + text: `Rank these tasks:\n${candidates.map(task => `- ${task.title}`).join('\n')}` + } + } + ], + maxTokens: 400 + }; + + let rankingText: string; + if (reqCtx.era === 'legacy') { + // 2025 era: push-style sampling/createMessage back to the client, awaited inline. + const response = await ctx.mcpReq.requestSampling(samplingRequest); + rankingText = !Array.isArray(response.content) && response.content.type === 'text' ? response.content.text : ''; + } else { + // 2026-07-28: a single input_required round (the ranking arrives on the retried + // call), so no requestState is needed. For multi-round flows, dispatch on a + // discriminated requestState instead — see brainstorm_tasks. + const response = ctx.mcpReq.inputResponses?.['ranking']; + if (response === undefined) { + return inputRequired({ inputRequests: { ranking: inputRequired.createMessage(samplingRequest) } }); + } + rankingText = sampledText(response); + } + + const ranked = applyRanking(rankingText, candidates); + for (const [index, task] of ranked.entries()) { + task.priority = priorityForRank(index, ranked.length); + } + // Priorities are board-visible state — watchers and list caches must hear about it. + await announceBoardChange(); + await logInfo(ctx, `prioritize: ranked ${ranked.length} open task(s) via the host LLM`); + return { + content: [ + { + type: 'text', + text: `Re-prioritized ${ranked.length} task(s):\n${ranked.map(task => `- ${task.title} → ${task.priority}`).join('\n')}` + } + ] + }; + } + ); + + return server; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fa3afea05a..8df5ee1608 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -404,6 +404,37 @@ importers: specifier: catalog:devTools version: 4.21.0 + examples/cli-client: + dependencies: + '@anthropic-ai/sdk': + specifier: ^0.74.0 + version: 0.74.0(zod@4.3.6) + '@google/genai': + specifier: ^1.0.0 + version: 1.52.0(@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@4.3.6)) + '@mcp-examples/shared': + specifier: workspace:* + version: link:../shared + '@modelcontextprotocol/client': + specifier: workspace:* + version: link:../../packages/client + open: + specifier: ^11.0.0 + version: 11.0.0 + openai: + specifier: ^6.0.0 + version: 6.44.0(ws@8.18.0)(zod@4.3.6) + zod: + specifier: catalog:runtimeShared + version: 4.3.6 + devDependencies: + tsx: + specifier: catalog:devTools + version: 4.21.0 + vitest: + specifier: catalog:devTools + version: 4.1.2(@opentelemetry/api@1.9.1)(@types/node@25.5.0)(vite@7.3.0(@types/node@25.5.0)(tsx@4.21.0)(yaml@2.8.3)) + examples/client-quickstart: dependencies: '@anthropic-ai/sdk': @@ -1103,6 +1134,25 @@ importers: specifier: catalog:devTools version: 4.21.0 + examples/todos-server: + dependencies: + '@mcp-examples/shared': + specifier: workspace:* + version: link:../shared + '@modelcontextprotocol/node': + specifier: workspace:* + version: link:../../packages/middleware/node + '@modelcontextprotocol/server': + specifier: workspace:* + version: link:../../packages/server + zod: + specifier: catalog:runtimeShared + version: 4.3.6 + devDependencies: + tsx: + specifier: catalog:devTools + version: 4.21.0 + examples/tools: dependencies: '@mcp-examples/shared': @@ -2571,6 +2621,15 @@ packages: '@gerrit0/mini-shiki@3.23.0': resolution: {integrity: sha512-bEMORlG0cqdjVyCEuU0cDQbORWX+kYCeo0kV1lbxF5bt4r7SID2l9bqsxJEM0zndaxpOUT7riCyIVEuqq/Ynxg==} + '@google/genai@1.52.0': + resolution: {integrity: sha512-gwSvbpiN/17O9TbsqSsE/OzZcpv5Fo4RQjdngGgogtuB9RsyJ8ZHhX5KjHj1bp5N9snN2eK8LDGXSaWW2hof8Q==} + engines: {node: '>=20.0.0'} + peerDependencies: + '@modelcontextprotocol/sdk': ^1.25.2 + peerDependenciesMeta: + '@modelcontextprotocol/sdk': + optional: true + '@hono/node-server@1.19.11': resolution: {integrity: sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==} engines: {node: '>=18.14.1'} @@ -2902,6 +2961,33 @@ packages: '@poppinss/exception@1.2.3': resolution: {integrity: sha512-dCED+QRChTVatE9ibtoaxc+WkdzOSjYTKi/+uacHWIsfodVfpsueo3+DKpgU5Px8qXjgmXkSvhXvSCz3fnP9lw==} + '@protobufjs/aspromise@1.1.2': + resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} + + '@protobufjs/base64@1.1.2': + resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} + + '@protobufjs/codegen@2.0.5': + resolution: {integrity: sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==} + + '@protobufjs/eventemitter@1.1.1': + resolution: {integrity: sha512-vW1GmwMZNnL+gMRaovlh9yZX74kc+TTU3FObkkurpMaRtBfLP3ldjS9KQWlwZgraRE0+dheEEoAxdzcJQ8eXZg==} + + '@protobufjs/fetch@1.1.1': + resolution: {integrity: sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw==} + + '@protobufjs/float@1.0.2': + resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} + + '@protobufjs/path@1.1.2': + resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} + + '@protobufjs/pool@1.1.0': + resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} + + '@protobufjs/utf8@1.1.1': + resolution: {integrity: sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==} + '@quansync/fs@1.0.0': resolution: {integrity: sha512-4TJ3DFtlf1L5LDMaM6CanJ/0lckGNtJcMjQ1NAV6zDmA0tEHKZtxNKin8EgPaVX1YzljbxckyT2tJrpQKAtngQ==} @@ -3328,6 +3414,9 @@ packages: '@types/range-parser@1.2.7': resolution: {integrity: sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==} + '@types/retry@0.12.0': + resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==} + '@types/send@1.2.1': resolution: {integrity: sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==} @@ -3595,6 +3684,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@7.1.4: + resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} + engines: {node: '>= 14'} + ajv-formats@3.0.1: resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} peerDependencies: @@ -3790,6 +3883,9 @@ packages: resolution: {integrity: sha512-RxD2Vd96sQDjQr20kdP+F+dK/1OUNiVOl200vKBZY8u0vTwysfolF6Hq+3ZK2+h8My9YvZhHsF+RSGZW2VYrPQ==} engines: {node: 20.x || 22.x || 23.x || 24.x || 25.x} + bignumber.js@9.3.1: + resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==} + bindings@1.5.0: resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} @@ -3822,6 +3918,9 @@ packages: engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true + buffer-equal-constant-time@1.0.1: + resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} + buffer@5.7.1: resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} @@ -3950,6 +4049,10 @@ packages: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} + data-uri-to-buffer@4.0.1: + resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==} + engines: {node: '>= 12'} + data-view-buffer@1.0.2: resolution: {integrity: sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==} engines: {node: '>= 0.4'} @@ -4064,6 +4167,9 @@ packages: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} + ecdsa-sig-formatter@1.0.11: + resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} + ee-first@1.1.1: resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} @@ -4334,6 +4440,9 @@ packages: resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==} engines: {node: '>= 18'} + extend@3.0.2: + resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} + extendable-error@0.1.7: resolution: {integrity: sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg==} @@ -4383,6 +4492,10 @@ packages: picomatch: optional: true + fetch-blob@3.2.0: + resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} + engines: {node: ^12.20 || >= 14.13} + file-entry-cache@8.0.0: resolution: {integrity: sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==} engines: {node: '>=16.0.0'} @@ -4429,6 +4542,10 @@ packages: resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} engines: {node: '>= 6'} + formdata-polyfill@4.0.10: + resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} + engines: {node: '>=12.20.0'} + formidable@3.5.4: resolution: {integrity: sha512-YikH+7CUTOtP44ZTnUhR7Ic2UASBPOqmaRkRKxRbywPTe5VxF7RRCck4af9wutiZ/QKM5nME9Bie2fFaPz5Gug==} engines: {node: '>=14.0.0'} @@ -4467,6 +4584,14 @@ packages: functions-have-names@1.2.3: resolution: {integrity: sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==} + gaxios@7.1.5: + resolution: {integrity: sha512-5FZy72Rh8LhtjmvDrKkI+lVhrsQrVKVsItxMoDm5mNQE+xR0WVIIs+jzPSJgBvKVsLi24fZhXJIsNI0bihDzFg==} + engines: {node: '>=18'} + + gcp-metadata@8.1.2: + resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==} + engines: {node: '>=18'} + generator-function@2.0.1: resolution: {integrity: sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g==} engines: {node: '>= 0.4'} @@ -4520,6 +4645,14 @@ packages: globrex@0.1.2: resolution: {integrity: sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg==} + google-auth-library@10.7.0: + resolution: {integrity: sha512-QpTAbNJ36TliZLx3TTtahR8HG0hN9RllL1e3FymOvQSIKK8JmgV58H924ub2wa2DsS3ANjjP1Aw1N+Ramc8hqQ==} + engines: {node: '>=18'} + + google-logging-utils@1.1.3: + resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==} + engines: {node: '>=14'} + gopd@1.2.0: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} @@ -4565,6 +4698,10 @@ packages: resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} engines: {node: '>= 0.8'} + https-proxy-agent@7.0.6: + resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} + engines: {node: '>= 14'} + human-id@4.1.3: resolution: {integrity: sha512-tsYlhAYpjCKa//8rXZ9DqKEawhPoSytweBC2eNvcaDK+57RZLHGqNs3PZTQO6yekLFSuvA6AlnAfrw1uBvtb+Q==} hasBin: true @@ -4784,6 +4921,9 @@ packages: engines: {node: '>=6'} hasBin: true + json-bigint@1.0.0: + resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} + json-buffer@3.0.1: resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==} @@ -4816,6 +4956,12 @@ packages: jsonfile@4.0.0: resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==} + jwa@2.0.1: + resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==} + + jws@4.0.1: + resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==} + keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} @@ -4905,6 +5051,9 @@ packages: lodash.startcase@4.4.0: resolution: {integrity: sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg==} + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + lunr@2.3.9: resolution: {integrity: sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==} @@ -5020,6 +5169,11 @@ packages: resolution: {integrity: sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==} engines: {node: '>=10'} + node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead + node-fetch@2.7.0: resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} engines: {node: 4.x || >=6.0.0} @@ -5029,6 +5183,10 @@ packages: encoding: optional: true + node-fetch@3.3.2: + resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + node-releases@2.0.36: resolution: {integrity: sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==} @@ -5078,6 +5236,17 @@ packages: resolution: {integrity: sha512-smsWv2LzFjP03xmvFoJ331ss6h+jixfA4UUV/Bsiyuu4YJPfN+FIQGOIiv4w9/+MoHkfkJ22UIaQWRVFRfH6Vw==} engines: {node: '>=20'} + openai@6.44.0: + resolution: {integrity: sha512-09/gH+8jH0RgUwsgWHAaxsKGRT5zVZ95IaJUnqAWj6XejIBmnFRwq2WUIF37VtDEsmGrtPmvCs5+yBSeZGWvkA==} + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} engines: {node: '>= 0.8.0'} @@ -5113,6 +5282,10 @@ packages: resolution: {integrity: sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==} engines: {node: '>=6'} + p-retry@4.6.2: + resolution: {integrity: sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==} + engines: {node: '>=8'} + p-try@2.2.0: resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} engines: {node: '>=6'} @@ -5226,6 +5399,10 @@ packages: process-warning@5.0.0: resolution: {integrity: sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==} + protobufjs@7.6.4: + resolution: {integrity: sha512-RJJPTTpvFfHcWLkIa2JFWK4XvtSzS0yEWDmunqHXli1h3JlkbcQZXDZdcWxv+JK3Xsl5/UFDPZ0iGm7DAengYw==} + engines: {node: '>=12.0.0'} + proxy-addr@2.0.7: resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} engines: {node: '>= 0.10'} @@ -5321,6 +5498,10 @@ packages: resolution: {integrity: sha512-I1XxrZSQ+oErkRR4jYbAyEEu2I0avBvvMM5JN+6EBprOGRCs63ENqZ3vjavq8fBw2+62G5LF5XelKwuJpcvcxw==} engines: {node: '>=10'} + retry@0.13.1: + resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} + engines: {node: '>= 4'} + reusify@1.1.0: resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==} engines: {iojs: '>=1.0.0', node: '>=0.10.0'} @@ -5898,6 +6079,10 @@ packages: jsdom: optional: true + web-streams-polyfill@3.3.3: + resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} + engines: {node: '>= 8'} + webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} @@ -6514,6 +6699,19 @@ snapshots: '@shikijs/types': 3.23.0 '@shikijs/vscode-textmate': 10.0.2 + '@google/genai@1.52.0(@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@4.3.6))': + dependencies: + google-auth-library: 10.7.0 + p-retry: 4.6.2 + protobufjs: 7.6.4 + ws: 8.18.0 + optionalDependencies: + '@modelcontextprotocol/sdk': 1.29.0(@cfworker/json-schema@4.1.1)(zod@4.3.6) + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + '@hono/node-server@1.19.11(hono@4.12.9)': dependencies: hono: 4.12.9 @@ -6827,6 +7025,26 @@ snapshots: '@poppinss/exception@1.2.3': {} + '@protobufjs/aspromise@1.1.2': {} + + '@protobufjs/base64@1.1.2': {} + + '@protobufjs/codegen@2.0.5': {} + + '@protobufjs/eventemitter@1.1.1': {} + + '@protobufjs/fetch@1.1.1': + dependencies: + '@protobufjs/aspromise': 1.1.2 + + '@protobufjs/float@1.0.2': {} + + '@protobufjs/path@1.1.2': {} + + '@protobufjs/pool@1.1.0': {} + + '@protobufjs/utf8@1.1.1': {} + '@quansync/fs@1.0.0': dependencies: quansync: 1.0.0 @@ -7113,6 +7331,8 @@ snapshots: '@types/range-parser@1.2.7': {} + '@types/retry@0.12.0': {} + '@types/send@1.2.1': dependencies: '@types/node': 24.12.0 @@ -7383,6 +7603,8 @@ snapshots: acorn@8.16.0: {} + agent-base@7.1.4: {} + ajv-formats@3.0.1(ajv@8.18.0): optionalDependencies: ajv: 8.18.0 @@ -7557,6 +7779,8 @@ snapshots: bindings: 1.5.0 prebuild-install: 7.1.3 + bignumber.js@9.3.1: {} + bindings@1.5.0: dependencies: file-uri-to-path: 1.0.0 @@ -7606,6 +7830,8 @@ snapshots: node-releases: 2.0.36 update-browserslist-db: 1.2.3(browserslist@4.28.1) + buffer-equal-constant-time@1.0.1: {} + buffer@5.7.1: dependencies: base64-js: 1.5.1 @@ -7710,6 +7936,8 @@ snapshots: shebang-command: 2.0.0 which: 2.0.2 + data-uri-to-buffer@4.0.1: {} + data-view-buffer@1.0.2: dependencies: call-bound: 1.0.4 @@ -7802,6 +8030,10 @@ snapshots: es-errors: 1.3.0 gopd: 1.2.0 + ecdsa-sig-formatter@1.0.11: + dependencies: + safe-buffer: 5.2.1 + ee-first@1.1.1: {} electron-to-chromium@1.5.328: {} @@ -8236,6 +8468,8 @@ snapshots: transitivePeerDependencies: - supports-color + extend@3.0.2: {} + extendable-error@0.1.7: {} fast-content-type-parse@3.0.0: {} @@ -8299,6 +8533,11 @@ snapshots: optionalDependencies: picomatch: 4.0.4 + fetch-blob@3.2.0: + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 3.3.3 + file-entry-cache@8.0.0: dependencies: flat-cache: 4.0.1 @@ -8357,6 +8596,10 @@ snapshots: hasown: 2.0.2 mime-types: 2.1.35 + formdata-polyfill@4.0.10: + dependencies: + fetch-blob: 3.2.0 + formidable@3.5.4: dependencies: '@paralleldrive/cuid2': 2.3.1 @@ -8397,6 +8640,22 @@ snapshots: functions-have-names@1.2.3: {} + gaxios@7.1.5: + dependencies: + extend: 3.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + transitivePeerDependencies: + - supports-color + + gcp-metadata@8.1.2: + dependencies: + gaxios: 7.1.5 + google-logging-utils: 1.1.3 + json-bigint: 1.0.0 + transitivePeerDependencies: + - supports-color + generator-function@2.0.1: {} get-intrinsic@1.3.0: @@ -8459,6 +8718,19 @@ snapshots: globrex@0.1.2: {} + google-auth-library@10.7.0: + dependencies: + base64-js: 1.5.1 + ecdsa-sig-formatter: 1.0.11 + gaxios: 7.1.5 + gcp-metadata: 8.1.2 + google-logging-utils: 1.1.3 + jws: 4.0.1 + transitivePeerDependencies: + - supports-color + + google-logging-utils@1.1.3: {} + gopd@1.2.0: {} graceful-fs@4.2.11: {} @@ -8497,6 +8769,13 @@ snapshots: statuses: 2.0.2 toidentifier: 1.0.1 + https-proxy-agent@7.0.6: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + human-id@4.1.3: {} iconv-lite@0.6.3: @@ -8694,6 +8973,10 @@ snapshots: jsesc@3.1.0: {} + json-bigint@1.0.0: + dependencies: + bignumber.js: 9.3.1 + json-buffer@3.0.1: {} json-schema-ref-resolver@3.0.0: @@ -8723,6 +9006,17 @@ snapshots: optionalDependencies: graceful-fs: 4.2.11 + jwa@2.0.1: + dependencies: + buffer-equal-constant-time: 1.0.1 + ecdsa-sig-formatter: 1.0.11 + safe-buffer: 5.2.1 + + jws@4.0.1: + dependencies: + jwa: 2.0.1 + safe-buffer: 5.2.1 + keyv@4.5.4: dependencies: json-buffer: 3.0.1 @@ -8801,6 +9095,8 @@ snapshots: lodash.startcase@4.4.0: {} + long@5.3.2: {} + lunr@2.3.9: {} magic-string@0.30.21: @@ -8893,12 +9189,20 @@ snapshots: dependencies: semver: 7.7.4 + node-domexception@1.0.0: {} + node-fetch@2.7.0(encoding@0.1.13): dependencies: whatwg-url: 5.0.0 optionalDependencies: encoding: 0.1.13 + node-fetch@3.3.2: + dependencies: + data-uri-to-buffer: 4.0.1 + fetch-blob: 3.2.0 + formdata-polyfill: 4.0.10 + node-releases@2.0.36: {} object-assign@4.1.1: {} @@ -8957,6 +9261,11 @@ snapshots: powershell-utils: 0.1.0 wsl-utils: 0.3.1 + openai@6.44.0(ws@8.18.0)(zod@4.3.6): + optionalDependencies: + ws: 8.18.0 + zod: 4.3.6 + optionator@0.9.4: dependencies: deep-is: 0.1.4 @@ -8996,6 +9305,11 @@ snapshots: p-map@2.1.0: {} + p-retry@4.6.2: + dependencies: + '@types/retry': 0.12.0 + retry: 0.13.1 + p-try@2.2.0: {} package-manager-detector@0.2.11: @@ -9091,6 +9405,20 @@ snapshots: process-warning@5.0.0: {} + protobufjs@7.6.4: + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/base64': 1.1.2 + '@protobufjs/codegen': 2.0.5 + '@protobufjs/eventemitter': 1.1.1 + '@protobufjs/fetch': 1.1.1 + '@protobufjs/float': 1.0.2 + '@protobufjs/path': 1.1.2 + '@protobufjs/pool': 1.1.0 + '@protobufjs/utf8': 1.1.1 + '@types/node': 24.12.0 + long: 5.3.2 + proxy-addr@2.0.7: dependencies: forwarded: 0.2.0 @@ -9190,6 +9518,8 @@ snapshots: ret@0.5.0: {} + retry@0.13.1: {} + reusify@1.1.0: {} rfdc@1.4.1: {} @@ -9916,6 +10246,8 @@ snapshots: transitivePeerDependencies: - msw + web-streams-polyfill@3.3.3: {} + webidl-conversions@3.0.1: {} whatwg-url@5.0.0: From 4565885063f7f67a9e3c77d1d6509fcd6bd77f94 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 29 Jun 2026 11:28:44 +0000 Subject: [PATCH 2/5] fix(examples,docs): gate URL-mode elicitation like OAuth; document tool-consent policy - URL-mode elicitation now applies the same https-or-loopback check as the OAuth flow before offering to open a server-supplied URL (file:, javascript:, and plain-http phishing URLs fail closed to a decline). The check is a shared isSafeBrowserUrl helper, unit-tested, and the guide's URL-mode section now states the gate. - The host-integration guide's security section gains a tool-consent bullet: the spec expects a human in the loop able to deny tool invocations; cli-client auto-executes because an interactive user watches every call, and a one-line comment at the execution site says an unattended host must gate execution on user consent. --- docs/host-integration.md | 6 +++++- examples/cli-client/host/auth.ts | 11 ++++++++++- examples/cli-client/host/host.ts | 15 ++++++++++++++- examples/cli-client/host/loop.ts | 3 +++ examples/cli-client/test/auth.test.ts | 17 +++++++++++++++++ packages/codemod/src/generated/versions.ts | 12 ++++++------ 6 files changed, 55 insertions(+), 9 deletions(-) diff --git a/docs/host-integration.md b/docs/host-integration.md index 4277b0af4a..db5c6af231 100644 --- a/docs/host-integration.md +++ b/docs/host-integration.md @@ -137,6 +137,9 @@ export async function runModelRounds(session: ChatSession): Promise { if (result.text) ui.print(result.text); if (result.toolCalls.length === 0) return; + // cli-client executes tool calls without a confirmation gate because an interactive + // user watches every `→` line and holds Ctrl-C; a host without that live supervision + // must gate execution on user consent (see the guide's security section). for (const call of result.toolCalls) { ui.status(`→ ${call.name} ${JSON.stringify(call.arguments)}`); // Long-running calls stay cancellable: Ctrl-C aborts this call (the SDK sends @@ -240,7 +243,7 @@ Declare the capability in the client constructor and register the handler once; Elicitation is the inverse of sampling: the server needs _the human_, not the model. Two modes arrive at the same handler: - **Form mode** carries `message` plus a flat `requestedSchema` (strings, numbers, booleans, enums — no nesting). Generate UI from it: cli-client walks the properties and asks one question per field in the terminal (`host/ui.ts`), validating against the declared type before accepting. -- **URL mode** carries a URL the user must visit (payment, OAuth-style consent, anything that should not pass through the host). Show it, let the user open it, confirm when done. +- **URL mode** carries a URL the user must visit (payment, OAuth-style consent, anything that should not pass through the host). Apply the same https-or-loopback gate as OAuth before offering it, then show it, let the user open it, confirm when done. Return exactly one of the three outcomes and mean it: `accept` (with the collected content), `decline` (the user said no), `cancel` (the user dismissed it). Decline and cancel are answers, not retries — a server that re-asks on decline is a bug, and a host that maps errors to `accept` is a worse one. cli-client fails closed: any error in form collection becomes `cancel`. @@ -280,6 +283,7 @@ Patterns worth knowing about once the basics work — none of them are in cli-cl A host sits between untrusted servers, a user's credentials, and a model that does what its context tells it. The short list cli-client implements and the guide above assumes: - Treat every server-provided string as untrusted input: strip terminal escape sequences before rendering, label injected content with its origin, and cap its size. +- Decide a tool-consent policy: the spec expects a human in the loop able to deny tool invocations, so confirm destructive or side-effecting calls (or maintain a per-server allowlist). cli-client auto-executes because an interactive user watches every call and can Ctrl-C; an unattended host must not. - Gate sampling on explicit user approval and cap its token spend; gate browser-opening (OAuth, URL elicitation) the same way. - Never hand a child server process your full environment, and keep API keys out of config files (`${VAR}` interpolation exists for this). - Validate the OAuth `state` parameter, only hand `https:` (or loopback) authorization URLs to the browser, and never render attacker-controllable error descriptions from callbacks. diff --git a/examples/cli-client/host/auth.ts b/examples/cli-client/host/auth.ts index d8e081ecf1..6bc3fbd6aa 100644 --- a/examples/cli-client/host/auth.ts +++ b/examples/cli-client/host/auth.ts @@ -136,6 +136,15 @@ function isLoopbackHost(hostname: string): boolean { return hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '[::1]' || hostname === '::1'; } +/** + * True when a server-supplied URL is safe to hand to the system browser: `https:`, or `http:` + * on a loopback host. Everything else (`file:`, `javascript:`, plain http to a remote host) + * fails closed. Shared by the OAuth flow and URL-mode elicitation. + */ +export function isSafeBrowserUrl(url: URL): boolean { + return url.protocol === 'https:' || (url.protocol === 'http:' && isLoopbackHost(url.hostname)); +} + /** * Complete an interactive OAuth flow after `connect()` failed with `UnauthorizedError`: * confirm with the user, open the system browser, wait for the loopback callback, verify @@ -157,7 +166,7 @@ export async function completeAuthorizationWithBrowser(options: { if (!authorizationUrl) return false; // The authorization endpoint comes from server-controlled discovery metadata — never hand // a non-https (or non-loopback) URL to the browser, and show the user where they're going. - if (authorizationUrl.protocol !== 'https:' && !(authorizationUrl.protocol === 'http:' && isLoopbackHost(authorizationUrl.hostname))) { + if (!isSafeBrowserUrl(authorizationUrl)) { ui.status(`skipping "${serverName}" — refusing to open non-https authorization URL (${authorizationUrl.origin})`); return false; } diff --git a/examples/cli-client/host/host.ts b/examples/cli-client/host/host.ts index d11ba5e0c5..5897785be7 100644 --- a/examples/cli-client/host/host.ts +++ b/examples/cli-client/host/host.ts @@ -25,7 +25,7 @@ import { StdioClientTransport } from '@modelcontextprotocol/client/stdio'; import type { ChatMessage, ContentPart, GenerateResult, LLMProvider, ToolCall, ToolDefinition } from '../providers/provider'; import { isRecord } from '../providers/provider'; -import { completeAuthorizationWithBrowser, createOAuthProvider, findCallbackPort } from './auth'; +import { completeAuthorizationWithBrowser, createOAuthProvider, findCallbackPort, isSafeBrowserUrl } from './auth'; import type { CliClientConfig, ServerConfig } from './config'; import { isHttpServer } from './config'; import { contentBlockToParts, resourceToContextText, toolResultToParts } from './content'; @@ -405,6 +405,19 @@ export class McpHost { client.setRequestHandler('elicitation/create', async (request): Promise => { const params = request.params; if (params.mode === 'url') { + // Same discipline as the OAuth path: never offer a server-controlled URL to the + // browser unless it is https (or http on loopback) — file:, javascript:, and + // plain-http phishing URLs all fail closed to a decline. + let target: URL | undefined; + try { + target = new URL(params.url); + } catch { + target = undefined; + } + if (!target || !isSafeBrowserUrl(target)) { + this.ui.status(`declined URL elicitation from "${name}" — refusing to open a non-https URL`); + return { action: 'decline' }; + } this.ui.attention( `[elicitation request]\nServer "${name}" needs you to complete a step in the browser:\n\n${params.url}\n` ); diff --git a/examples/cli-client/host/loop.ts b/examples/cli-client/host/loop.ts index 7eb79f4abe..c5cc28a5c5 100644 --- a/examples/cli-client/host/loop.ts +++ b/examples/cli-client/host/loop.ts @@ -71,6 +71,9 @@ export async function runModelRounds(session: ChatSession): Promise { if (result.text) ui.print(result.text); if (result.toolCalls.length === 0) return; + // cli-client executes tool calls without a confirmation gate because an interactive + // user watches every `→` line and holds Ctrl-C; a host without that live supervision + // must gate execution on user consent (see the guide's security section). for (const call of result.toolCalls) { ui.status(`→ ${call.name} ${JSON.stringify(call.arguments)}`); // Long-running calls stay cancellable: Ctrl-C aborts this call (the SDK sends diff --git a/examples/cli-client/test/auth.test.ts b/examples/cli-client/test/auth.test.ts index b0d00f770c..dd5ffafe77 100644 --- a/examples/cli-client/test/auth.test.ts +++ b/examples/cli-client/test/auth.test.ts @@ -5,6 +5,7 @@ import { completeAuthorizationWithBrowser, createOAuthProvider, findCallbackPort, + isSafeBrowserUrl, waitForOAuthCallback } from '../host/auth'; import { ScriptedUI } from '../script/scriptedUi'; @@ -140,3 +141,19 @@ describe('completeAuthorizationWithBrowser', () => { expect(openedUrl).toBe(`http://127.0.0.1:9/authorize?state=${expectedState}`); }); }); + +describe('isSafeBrowserUrl', () => { + it('allows https anywhere and http only on loopback', () => { + expect(isSafeBrowserUrl(new URL('https://example.com/step'))).toBe(true); + expect(isSafeBrowserUrl(new URL('http://127.0.0.1:8080/cb'))).toBe(true); + expect(isSafeBrowserUrl(new URL('http://localhost/cb'))).toBe(true); + expect(isSafeBrowserUrl(new URL('http://[::1]:9000/cb'))).toBe(true); + }); + + it('refuses remote http and non-web schemes', () => { + expect(isSafeBrowserUrl(new URL('http://example.com/phish'))).toBe(false); + expect(isSafeBrowserUrl(new URL('file:///etc/passwd'))).toBe(false); + expect(isSafeBrowserUrl(new URL('javascript:alert(1)'))).toBe(false); + expect(isSafeBrowserUrl(new URL('ftp://example.com/x'))).toBe(false); + }); +}); diff --git a/packages/codemod/src/generated/versions.ts b/packages/codemod/src/generated/versions.ts index 4fa12a1a87..196a367508 100644 --- a/packages/codemod/src/generated/versions.ts +++ b/packages/codemod/src/generated/versions.ts @@ -1,9 +1,9 @@ // AUTO-GENERATED — do not edit. Run `pnpm run generate:versions` to regenerate. export const V2_PACKAGE_VERSIONS: Record = { - '@modelcontextprotocol/client': '^2.0.0-alpha.2', - '@modelcontextprotocol/server': '^2.0.0-alpha.2', - '@modelcontextprotocol/node': '^2.0.0-alpha.2', - '@modelcontextprotocol/express': '^2.0.0-alpha.2', - '@modelcontextprotocol/server-legacy': '^2.0.0-alpha.2', - '@modelcontextprotocol/core': '^2.0.0-alpha.0' + '@modelcontextprotocol/client': '^2.0.0-alpha.3', + '@modelcontextprotocol/server': '^2.0.0-alpha.3', + '@modelcontextprotocol/node': '^2.0.0-alpha.3', + '@modelcontextprotocol/express': '^2.0.0-alpha.3', + '@modelcontextprotocol/server-legacy': '^2.0.0-alpha.3', + '@modelcontextprotocol/core': '^2.0.0-alpha.1' }; From a323fe75e0e68da6ee9b74c9621eab6545697b89 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 29 Jun 2026 12:19:19 +0000 Subject: [PATCH 3/5] fix(examples): stop logging OAuth authorization-URL details in cli-client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The browser-authorization flow logged strings derived from the authorization URL (origin in the consent/refusal lines, the full URL in an "opening …" status and in the could-not-open fallback). None of it is a credential — the URL is exactly what the user's browser is about to show — but log lines are the wrong channel for it: - the consent and refusal lines now use static text (the URL adds nothing there), - the "opening …" status no longer echoes the URL, - the could-not-open fallback now presents the URL through the interactive prompt and waits for the user to confirm before polling the callback, instead of printing it and racing ahead. This also clears CodeQL's js/clear-text-logging findings on the UI sinks, which taint-tracked everything read off the OAuth provider into console output. --- examples/cli-client/host/auth.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/cli-client/host/auth.ts b/examples/cli-client/host/auth.ts index 6bc3fbd6aa..bb7ebb6302 100644 --- a/examples/cli-client/host/auth.ts +++ b/examples/cli-client/host/auth.ts @@ -167,10 +167,10 @@ export async function completeAuthorizationWithBrowser(options: { // The authorization endpoint comes from server-controlled discovery metadata — never hand // a non-https (or non-loopback) URL to the browser, and show the user where they're going. if (!isSafeBrowserUrl(authorizationUrl)) { - ui.status(`skipping "${serverName}" — refusing to open non-https authorization URL (${authorizationUrl.origin})`); + ui.status(`skipping "${serverName}" — refusing to open a non-https authorization URL`); return false; } - ui.attention(`[authorization]\nServer "${serverName}" requires authorization at ${authorizationUrl.origin}.`); + ui.attention(`[authorization]\nServer "${serverName}" requires you to sign in via your browser.`); const approved = await ui.confirm('Open your browser to sign in?'); if (!approved) { ui.status(`skipping "${serverName}" — authorization declined`); @@ -180,11 +180,15 @@ export async function completeAuthorizationWithBrowser(options: { // Attach a handler immediately so a listen failure can't become an unhandled rejection // while the browser-open is still in flight. callback.catch(() => {}); - ui.status(`opening ${authorizationUrl.toString()}`); + ui.status('opening your browser to sign in…'); try { await openUrl(authorizationUrl.toString()); } catch { - ui.print(`Could not open a browser automatically. Please open:\n${authorizationUrl.toString()}`); + // Show the URL through the interactive prompt rather than a log line: the flow now + // waits for the user instead of racing them, and the URL is displayed, not logged. + await ui.ask( + `Could not open a browser automatically. Open this URL in your browser, then press Enter\n\n ${authorizationUrl.toString()}\n\nReady?` + ); } let params: URLSearchParams; try { From b2f5bf752b94cba79d2eb13be10c327aeb1d74dd Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 29 Jun 2026 12:24:46 +0000 Subject: [PATCH 4/5] fix(examples,docs): address review findings in cli-client - Tab completion now offers /watch and the /exit alias (BUILTIN_COMMANDS was missing both, so the documented commands didn't complete). - Declining a sampling request now answers with the spec's application-level code -1 ("User rejected sampling request") instead of the reserved JSON-RPC InvalidRequest (-32600), matching the convention the e2e suite encodes; the guide's hand-written snippet follows. - The /server:prompt dispatch regex (and the completer's prompt-args branch) accept the same server-name shapes mention parsing does, so dotted config keys advertised by /prompts actually dispatch instead of falling through to chat. --- docs/host-integration.md | 3 ++- examples/cli-client/host/host.ts | 5 +++-- examples/cli-client/host/loop.ts | 4 +++- examples/cli-client/host/ui.ts | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/host-integration.md b/docs/host-integration.md index db5c6af231..2bef9b6213 100644 --- a/docs/host-integration.md +++ b/docs/host-integration.md @@ -217,7 +217,8 @@ client.setRequestHandler('sampling/createMessage', async request => { const params = request.params; const approved = await ui.confirm(`Server "${name}" wants to run an LLM request (${params.maxTokens} max tokens): "${preview(params)}". Allow?`); if (!approved) { - throw new ProtocolError(ProtocolErrorCode.InvalidRequest, 'User declined the sampling request'); + // The spec's code for a user-rejected sampling request: application-level -1. + throw new ProtocolError(-1, 'User rejected sampling request'); } const result = await provider.generate({ system: params.systemPrompt, diff --git a/examples/cli-client/host/host.ts b/examples/cli-client/host/host.ts index 5897785be7..15d179b217 100644 --- a/examples/cli-client/host/host.ts +++ b/examples/cli-client/host/host.ts @@ -16,7 +16,6 @@ import { Client, LOG_LEVEL_META_KEY, ProtocolError, - ProtocolErrorCode, SdkError, StreamableHTTPClientTransport, UnauthorizedError @@ -378,7 +377,9 @@ export class McpHost { ); const approved = await this.ui.confirm('Allow?'); if (!approved) { - throw new ProtocolError(ProtocolErrorCode.InvalidRequest, 'User declined the sampling request'); + // The spec's code for a user-rejected sampling request is the application-level -1 — + // not a reserved JSON-RPC code; the request itself was perfectly well-formed. + throw new ProtocolError(-1, 'User rejected sampling request'); } const stopSpinner = this.ui.spinner(); let result: GenerateResult; diff --git a/examples/cli-client/host/loop.ts b/examples/cli-client/host/loop.ts index c5cc28a5c5..27091e2609 100644 --- a/examples/cli-client/host/loop.ts +++ b/examples/cli-client/host/loop.ts @@ -230,7 +230,9 @@ export async function handleUserInput(session: ChatSession, input: string): Prom } // `/server:prompt-name key=value …` — MCP prompts become slash commands. - const promptCommand = trimmed.match(/^\/([A-Za-z0-9_-]+):(\S+)\s*(.*)$/); + // Server names come straight from config keys and may contain dots etc. — accept the + // same shapes mention parsing does, so the commands /prompts advertises actually run. + const promptCommand = trimmed.match(/^\/([^\s:]+):(\S+)\s*(.*)$/); if (promptCommand) { const serverName = promptCommand[1] ?? ''; const promptName = promptCommand[2] ?? ''; diff --git a/examples/cli-client/host/ui.ts b/examples/cli-client/host/ui.ts index d0f0509484..4f925f9da7 100644 --- a/examples/cli-client/host/ui.ts +++ b/examples/cli-client/host/ui.ts @@ -323,7 +323,7 @@ export async function collectFormInput(ui: HostUI, schema: FormSchema): Promise< } } -const BUILTIN_COMMANDS = ['/help', '/servers', '/tools', '/resources', '/prompts', '/roots', '/root add ', '/quit']; +const BUILTIN_COMMANDS = ['/help', '/servers', '/tools', '/resources', '/prompts', '/roots', '/root add ', '/watch ', '/quit', '/exit']; /** * Tab completion for the interactive CLI: slash commands and prompt names complete from the @@ -349,7 +349,7 @@ export function createCompleter(getHost: () => McpHost | undefined): (line: stri } // `/server:prompt arg=value …` — complete argument names, and argument values via completion/complete. - const promptArgs = /^\/([A-Za-z0-9_-]+):(\S+)\s+(?:.*\s)?([A-Za-z0-9_-]*)(=?)([^\s=]*)$/.exec(line); + const promptArgs = /^\/([^\s:]+):(\S+)\s+(?:.*\s)?([A-Za-z0-9_-]*)(=?)([^\s=]*)$/.exec(line); if (promptArgs) { const [, serverName = '', promptName = '', argumentName = '', equals = '', partial = ''] = promptArgs; const found = host.findPrompt(serverName, promptName); From 4ea873d4aab7cf87b063b4c852de42bccc525556 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 29 Jun 2026 12:53:04 +0000 Subject: [PATCH 5/5] refactor(examples,docs): fold host design notes into the cli-client README Replace the separate host-integration guide page with a Design notes section in the cli-client README covering the choices a copier should understand first: the example-local provider seam, error results fed back to the model, untrusted-display handling of server text, prompt role preservation, explicit fail-closed approvals, the deliberate absence of a tool-execution gate in an interactive terminal (and what an unattended host must add), and child-process env hygiene. The standalone guide needs more rounds of refinement before it earns a docs-tree slot; the example and its README stand on their own meanwhile. --- docs/documents.md | 2 - docs/host-integration.md | 298 --------------------- examples/cli-client/README.md | 16 +- examples/cli-client/host/config.ts | 4 +- examples/cli-client/host/loop.ts | 2 - examples/cli-client/host/naming.ts | 2 +- examples/cli-client/providers/anthropic.ts | 2 +- examples/cli-client/providers/provider.ts | 2 - 8 files changed, 18 insertions(+), 310 deletions(-) delete mode 100644 docs/host-integration.md diff --git a/docs/documents.md b/docs/documents.md index 22721e6cde..65cff9749c 100644 --- a/docs/documents.md +++ b/docs/documents.md @@ -5,7 +5,6 @@ children: - ./server.md - ./client-quickstart.md - ./client.md - - ./host-integration.md - ./faq.md --- @@ -15,5 +14,4 @@ children: - [Server](./server.md) – building MCP servers: transports, tools, resources, prompts, server-initiated requests, and deployment - [Client Quickstart](./client-quickstart.md) – build an LLM-powered chatbot that connects to an MCP server and calls its tools - [Client](./client.md) – building MCP clients: connecting, tools, resources, prompts, server-initiated requests, and error handling -- [Building a Host](./host-integration.md) – turning the client APIs into an application: the tool loop, resources as context, prompts as commands, sampling, elicitation, roots, and auth, walked through `examples/cli-client` - [FAQ](./faq.md) – frequently asked questions and troubleshooting diff --git a/docs/host-integration.md b/docs/host-integration.md deleted file mode 100644 index 2bef9b6213..0000000000 --- a/docs/host-integration.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -title: Building a Host ---- - -# Building a host - -A _host_ is the application that sits between users, a language model, and MCP servers: Claude, an IDE, a chat product, an internal tool with its own UI, a custom agent runtime. The SDK gives you the protocol verbs (`listTools`, `callTool`, `readResource`, …); this guide covers the part the protocol deliberately leaves to you — the application behaviors that turn those verbs into something a user can feel: tools the model actually calls, resources that become context, prompts that become commands, sampling and elicitation that round-trip through your UI. - -Everything here is narrated against [`examples/cli-client`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client) — a minimal but complete host you can run, read, and copy from. For the protocol-level concepts behind each feature, see the spec site's [client concepts](https://modelcontextprotocol.io/docs/learn/client-concepts) and [architecture](https://modelcontextprotocol.io/docs/learn/architecture) pages; this guide does not restate them. - -## Do you actually need to build a host? - -Most applications should not hand-roll this layer. Pick the first row that matches and stop there: - -| You are… | Do this instead | -| ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | -| Building an MCP **server** | Start at the [server quickstart](./server-quickstart.md) — you never write a host. | -| Bringing tools into **an existing host** (Claude, ChatGPT, Cursor, an IDE) | Configure your server in that host; read its docs. Nothing to build. | -| Calling a model API that offers a **hosted MCP connector** | Use the provider's connector and pass it your server's URL — the provider runs the loop for you. | -| Building on an **agent framework** that already speaks MCP (Claude Agent SDK, Vercel AI SDK, Pydantic AI, …) | Use the framework's MCP support; it owns the loop and the feature wiring. | -| Building the application that owns the conversation — an IDE, a chat product, an internal tool, your own agent runtime | You are building a host. Keep reading. | - -The narrow audience of this guide is the one that decides whether anything beyond tools ever gets used: hosts are where resources, prompts, sampling, and elicitation either become product features or stay dead protocol surface. - -## The mental model - -A host is a conduit between a model and servers it does not trust: - -1. Discover what each configured server offers (`tools/list`, `resources/list`, `prompts/list`). -2. Hand the model the tool definitions, namespaced per server. -3. Execute the tool calls the model makes — against the server that owns them — and feed the results back, verbatim and labelled. -4. Surface everything that needs a human (sampling approval, elicitation forms, OAuth) through your UI. - -The model never talks to a server directly; your host is the only thing that does. That makes the host responsible for the two judgement calls the protocol cannot make for you: _what the model gets to see_ (context, truncation, provenance) and _what the user gets to approve_ (sampling, destructive actions, credentials). - -cli-client's shape, which this guide walks through: - -```text -examples/cli-client/ (paired with examples/todos-server, the reference server) - cli.ts interactive entry host/host.ts connections, routing, handlers - client.ts scripted CI entry host/loop.ts the conversation loop - providers/ the LLM provider seam host/auth.ts OAuth for protected servers - host/ui.ts terminal UI + elicitation forms -``` - -## The provider seam - -The single most useful structural decision in a host is a thin interface between "the conversation" and "whatever model API you use". In cli-client that seam is `LLMProvider`: - -```ts source="../examples/cli-client/providers/provider.ts#llmProvider" -export interface ToolDefinition { - /** Namespaced tool name as exposed to the model (e.g. `mcp__todos__add_task`). */ - name: string; - description?: string; - /** JSON Schema for the tool's arguments, passed through from the MCP `Tool.inputSchema`. */ - inputSchema: Record; -} - -export type ContentPart = { type: 'text'; text: string } | { type: 'image'; mimeType: string; data: string }; - -export interface ToolCall { - /** Provider-assigned id, echoed back on the matching `role: 'tool'` message. */ - id: string; - /** Namespaced tool name (matches a `ToolDefinition.name`). */ - name: string; - arguments: Record; -} - -export type ChatMessage = - | { role: 'user'; content: ContentPart[] } - | { role: 'assistant'; content: ContentPart[]; toolCalls?: ToolCall[] } - | { role: 'tool'; toolCallId: string; toolName: string; content: ContentPart[]; isError?: boolean }; - -export interface GenerateRequest { - system?: string; - messages: ChatMessage[]; - tools?: ToolDefinition[]; - maxTokens?: number; - temperature?: number; -} - -export interface GenerateResult { - /** Assistant prose (may be empty when the model only calls tools). */ - text: string; - /** Tool calls the host must execute and feed back as `role: 'tool'` messages. */ - toolCalls: ToolCall[]; - stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'other'; - /** Provider-reported model id (also used to answer MCP sampling requests). */ - model: string; -} - -export interface LLMProvider { - readonly name: string; - generate(request: GenerateRequest): Promise; -} -``` - -Two things make this seam earn its keep: - -- **MCP tool definitions pass through it untouched.** `Tool.inputSchema` is already JSON Schema; every major provider accepts it as-is (`input_schema` for the Anthropic Messages API, `function.parameters` for Chat Completions, `parametersJsonSchema` for Gemini). The per-provider files in [`providers/`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client/providers) are each a complete, copyable mapping; the differences (where tool results go, how errors are flagged, what ids look like) are exactly the part worth reading once. -- **It serves both directions.** The chat loop calls it to drive the conversation, and the MCP sampling handler calls it to answer servers — one model integration, two consumers. - -Keep the seam in your application. It is deliberately _not_ an SDK package: the SDK stays a protocol library, and your host's message shapes belong to your host. - -## The loop (tools) - -Nothing in MCP runs the conversation for you. The loop every host writes: - -```ts source="../examples/cli-client/host/loop.ts#theLoop" -export async function runModelRounds(session: ChatSession): Promise { - const { host, provider, ui } = session; - // Server instructions and the aggregated tool list are stable within a single user turn. - const system = buildSystemPrompt(host); - const tools = host.toolDefinitions(); - for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { - const stopSpinner = ui.spinner(); - let result: GenerateResult; - try { - result = await provider.generate({ - system, - messages: session.messages, - tools, - maxTokens: session.maxTokens - }); - } finally { - stopSpinner(); - } - session.messages.push({ - role: 'assistant', - content: result.text ? [textPart(result.text)] : [], - ...(result.toolCalls.length > 0 ? { toolCalls: result.toolCalls } : {}) - }); - if (result.model !== session.announcedModel) { - session.announcedModel = result.model; - ui.status(`model: ${result.model}`); - } - if (result.text) ui.print(result.text); - if (result.toolCalls.length === 0) return; - - // cli-client executes tool calls without a confirmation gate because an interactive - // user watches every `→` line and holds Ctrl-C; a host without that live supervision - // must gate execution on user consent (see the guide's security section). - for (const call of result.toolCalls) { - ui.status(`→ ${call.name} ${JSON.stringify(call.arguments)}`); - // Long-running calls stay cancellable: Ctrl-C aborts this call (the SDK sends - // notifications/cancelled) and the failure goes back to the model like any other. - const cancellation = new AbortController(); - ui.setCancelHandler(() => { - ui.status(`cancelling ${call.name}…`, 'cancel'); - cancellation.abort(); - }); - let parts: ContentPart[]; - let isError: boolean; - try { - ({ parts, isError } = await host.executeToolCall(call, { signal: cancellation.signal })); - } finally { - ui.setCancelHandler(undefined); - } - const summary = partsToDisplayText(parts); - ui.status(`${isError ? '✗' : '✓'} ${call.name}: ${summary.length > 200 ? `${summary.slice(0, 200)}…` : summary}`); - session.messages.push({ role: 'tool', toolCallId: call.id, toolName: call.name, content: parts, isError }); - } - } - ui.print('(stopped: tool-call round limit reached)'); -} -``` - -The details that separate a working loop from a frustrating one: - -- **Namespace per server.** cli-client exposes every tool as `mcp____` (`host/naming.ts`), so two servers can both ship `search` and a model-issued call always routes back to the server that owns it. Sanitize server names first — provider APIs restrict tool-name characters. -- **Handle every tool call in the round, then loop.** Models issue parallel calls; execute them all and send all the results back before asking for the next turn (the Anthropic mapping additionally requires the results to share one user message — see `providers/anthropic.ts`). -- **`isError` is a result, not an exception.** Mark it as an error in the provider's format and let the model read the message — it is allowed to try something else. A _thrown_ error from `callTool` (unknown tool, timeout, lost connection) is different in kind, but the model should see that as a failed call too. -- **Narrow content blocks; never assume text.** Tool results can carry text, images, audio, resource links, and embedded resources (`host/content.ts` shows the full narrowing). Pass images through if your provider mapping supports them; reduce the rest to labelled placeholders. -- **Truncation is your job.** Neither the SDK nor the protocol caps what a server returns. cli-client caps everything it injects at 50k characters; pick a budget and state it. -- **Bound the loop.** A model that keeps calling tools forever is a bug; cap the rounds and say so when you hit the cap. -- **Fold `getInstructions()` into the system prompt** — server instructions exist precisely so the host can pass them to the model. - -> **What real hosts do** — Claude Code uses the same `mcp____` namespacing and feeds `isError` results back to the model as errors. Almost nobody in the wider ecosystem shows the loop itself: most SDK examples stop at "list tools and print them", and frameworks bury the loop inside middleware. It is one screen of code; write it once, visibly. - -## Resources become context - -Resources are **application-driven**: the protocol gives you list/read and deliberately does not say when to read. Three patterns cover real hosts: - -1. **User-driven (what cli-client implements).** The user names a resource — `@todos:todos://board what should I tackle first?` — the host calls `readResource`, and injects the contents as a context block _with provenance_: - - ```text - - …contents… - - The user attached this MCP resource as context. Use it to answer; do not re-read it unless told it changed. - ``` - - Label where the content came from, cap its size, replace binary contents with a placeholder, and tell the model not to re-fetch. Use `listResources()` (and resource templates plus `complete()`) to power the picker UX, and the client's `listChanged` option to keep the cached list fresh. To watch a specific resource, subscribe to it — `resources/subscribe` on 2025-era connections, `client.listen({ resourceSubscriptions: [uri] })` on 2026-07-28 — and react to `notifications/resources/updated`; cli-client exposes this as `/watch @server:uri`. - -2. **Auto-attach policies.** Some hosts attach certain resources to every conversation (an "active document", a project manifest) based on their own rules. Same mechanics as above — the policy is the only new part. - -3. **Model-driven (resources as tools).** If you want the _model_ to decide what to read, expose two synthetic tools — `list_resources(server?)` and `read_resource(server, uri)` — that call `listResources`/`readResource` under the hood. Register them only when at least one connected server actually declares the `resources` capability, apply the same size cap, and treat "not found" as a soft error that tells the model to re-list. This is the pattern to reach for when users won't know URIs but the task needs server data. - -> **What real hosts do** — Claude Code implements the user-driven path (`@server:uri`) _and_ the model-driven fallback (`ListMcpResources` / `ReadMcpResource` tools, registered only when a server declares resources, with a 100k-character cap), does not implement `resources/subscribe`, and relies on `list_changed` to invalidate its cached list. - -## Prompts become commands - -Prompts are user-invoked workflows. The host's job is small and concrete: - -- Surface each prompt as a command — cli-client uses `/server:prompt-name key=value …` — listing `prompt.arguments` so the user knows what to supply, and prompting for missing required arguments (`complete()` can power autocompletion for argument values). -- Call `getPrompt` and **append the returned messages to the conversation as separate turns, keeping their roles**. A prompt's value is often exactly that it seeds a multi-turn shape (context as a user turn, a primed assistant turn, then the ask); flattening it into one block of text throws that away. -- Then run the loop — the seeded conversation usually ends with something for the model to do. - -> **What real hosts do** — Claude Code exposes every server prompt as a slash command, but flattens the returned messages into a single hidden user message, discarding the roles. Keep the roles; it costs nothing and is what the shape is for. - -## Sampling: the server borrows your model - -`sampling/createMessage` is a server asking the _host's_ model to run a completion — so servers can ship LLM-powered features without shipping API keys. The host decides whether and how: - -```ts -client.setRequestHandler('sampling/createMessage', async request => { - const params = request.params; - const approved = await ui.confirm(`Server "${name}" wants to run an LLM request (${params.maxTokens} max tokens): "${preview(params)}". Allow?`); - if (!approved) { - // The spec's code for a user-rejected sampling request: application-level -1. - throw new ProtocolError(-1, 'User rejected sampling request'); - } - const result = await provider.generate({ - system: params.systemPrompt, - messages: params.messages.map(toChatMessage), - maxTokens: Math.min(params.maxTokens, SAMPLING_MAX_TOKENS_CAP) - }); - return { role: 'assistant', content: { type: 'text', text: result.text }, model: result.model, stopReason: 'endTurn' }; -}); -``` - -The three host responsibilities, in order of importance: - -1. **Gate it on the user.** A sampling request spends the user's tokens and can carry data to a third-party API. Show what the server asked and require an explicit yes; treat "no answer" as no. Cap `maxTokens` regardless of what was requested. -2. **Route it through the same provider as the chat.** That is the entire point — one model integration serves both the conversation and the servers (todos-server's `prioritize` and `brainstorm_tasks` tools both work this way through cli-client). -3. **Decline by omission, not by error.** If your host will not support sampling, simply do not declare the `sampling` capability — servers can check for it and fall back. Do not declare it and then reject every request. - -Declare the capability in the client constructor and register the handler once; the SDK carries the request over both protocol revisions (as a server→client request on 2025-era connections, and via `input_required` results on 2026-07-28 connections) without any era-specific code in your handler. Note that 2025-era push-style sampling needs a sessionful server when running over Streamable HTTP, and that as of the 2026-07-28 revision sampling is in a deprecation window (see the spec's versioning notes) — supported, but check the spec status before making it load-bearing. - -> **What real hosts do** — Claude Code does not declare the sampling capability at all (it has its own model loop and declines by omission). The C# SDK and FastMCP both ship "sampling handler backed by your chat client" helpers — evidence that when a host does say yes, wiring it to the existing provider is the established shape. - -## Elicitation: the server asks your user - -Elicitation is the inverse of sampling: the server needs _the human_, not the model. Two modes arrive at the same handler: - -- **Form mode** carries `message` plus a flat `requestedSchema` (strings, numbers, booleans, enums — no nesting). Generate UI from it: cli-client walks the properties and asks one question per field in the terminal (`host/ui.ts`), validating against the declared type before accepting. -- **URL mode** carries a URL the user must visit (payment, OAuth-style consent, anything that should not pass through the host). Apply the same https-or-loopback gate as OAuth before offering it, then show it, let the user open it, confirm when done. - -Return exactly one of the three outcomes and mean it: `accept` (with the collected content), `decline` (the user said no), `cancel` (the user dismissed it). Decline and cancel are answers, not retries — a server that re-asks on decline is a bug, and a host that maps errors to `accept` is a worse one. cli-client fails closed: any error in form collection becomes `cancel`. - -## Roots - -Roots tell servers which directories the conversation is about. Derive them from something real — the workspace folders, a `--root` flag, the cwd — declare the `roots` capability, answer `roots/list`, and send `roots/list_changed` when the set changes (on 2025-era connections; 2026-07-28 servers re-request roots when they need them). cli-client keeps this to a dozen lines in `host/host.ts` plus a `/root add` command; it is the cheapest feature in the protocol to support properly. Like sampling and logging, roots is in the 2026-07-28 deprecation window (SEP-2577) — supported throughout the window, with paths passed as tool parameters or configuration as the long-term direction. - -## Logging and progress - -- **Progress** is the live channel: pass `onprogress` on long-running `callTool` calls and render it (a status line is enough). It also gives you per-call attribution when the model runs tools in parallel. (todos-server demonstrates it with `work_through_tasks` — say "do all my tasks" and watch the status line.) -- **Cancellation** is the other half of long-running calls: pass an `AbortSignal` in the call's `RequestOptions` and abort it to cancel — the SDK sends `notifications/cancelled`, the local call rejects, and a well-behaved server checks `ctx.mcpReq.signal` and stops working. cli-client wires Ctrl-C during a tool call to exactly this; try it mid-way through "do all my tasks". -- **Logging**: render `notifications/message` as it arrives, tagged with the server name. On 2025-era connections call `setLoggingLevel(...)` once per server to opt in; on 2026-07-28 connections log delivery is opted into per request via the `io.modelcontextprotocol/logLevel` `_meta` key (and MCP-level logging is in a deprecation window). Whatever the era: a stdio server's `stderr` is also worth surfacing — that is where well-behaved servers put their own diagnostics. - -## Connecting, configuration, and auth - -Hosts conventionally read an `mcpServers`-shaped config (cli-client's `host/config.ts`): - -- `{ command, args, env?, cwd? }` entries are spawned as child processes speaking stdio. Pass the child a minimal environment plus exactly what the entry lists — never your host's full environment; your API keys live there. -- `{ url, headers? }` entries connect over Streamable HTTP. Support `${VAR}` interpolation so tokens stay in the environment, not the file. -- Adding a server to the config is an act of trust: it sees whatever the model sends it and its results go straight into the model's context. Say that in your own docs. - -For protected HTTP servers, two tiers cover almost everything: - -1. **Static credentials** — a bearer token or API key in `headers`. One line of config, no flow. -2. **OAuth** — when a server answers 401, the SDK drives discovery, dynamic client registration, PKCE, and token exchange through an `OAuthClientProvider` you supply ([client guide → Authentication](./client.md#authentication)). The host's share of the work (`host/auth.ts`): ask the user before opening a browser, run a loopback callback server, **verify the `state` parameter yourself** (the SDK does not), call `finishAuth()` on the transport that got the 401, then reconnect on a fresh transport with the same provider. Keep tokens in memory or in the platform keychain. - -## Going further - -Patterns worth knowing about once the basics work — none of them are in cli-client's code, deliberately: - -- **Progressive discovery.** Hosts with many servers should not dump every tool into every request: filter by the conversation (per-server enable/disable, model-visible tool search, or a cheap relevance pass), lean on `server.getInstructions()` to tell the model what a server is for, and use the client's `listChanged` tracking to refresh lazily instead of re-listing on every turn. -- **Programmatic tool calling.** Nothing requires a model in the loop: `callTool` is just an API call, so hosts can run MCP tools from code — scheduled jobs, slash commands that hit a tool directly, or letting the model write code that calls tools through an execution environment instead of one round trip per call. The same namespacing and result-handling rules apply; only the caller changes. -- **Automatic resource loading.** The model-driven fallback from the resources section — list/read exposed as tools — is the simplest way to let the model pull in server data it was not handed up front. - -## Security responsibilities of a host - -A host sits between untrusted servers, a user's credentials, and a model that does what its context tells it. The short list cli-client implements and the guide above assumes: - -- Treat every server-provided string as untrusted input: strip terminal escape sequences before rendering, label injected content with its origin, and cap its size. -- Decide a tool-consent policy: the spec expects a human in the loop able to deny tool invocations, so confirm destructive or side-effecting calls (or maintain a per-server allowlist). cli-client auto-executes because an interactive user watches every call and can Ctrl-C; an unattended host must not. -- Gate sampling on explicit user approval and cap its token spend; gate browser-opening (OAuth, URL elicitation) the same way. -- Never hand a child server process your full environment, and keep API keys out of config files (`${VAR}` interpolation exists for this). -- Validate the OAuth `state` parameter, only hand `https:` (or loopback) authorization URLs to the browser, and never render attacker-controllable error descriptions from callbacks. -- Treat tool annotations (`readOnlyHint`, `destructiveHint`) as hints for UX, never as a security boundary. - -## See also - -- [`examples/cli-client`](https://github.com/modelcontextprotocol/typescript-sdk/tree/main/examples/cli-client) — the example this guide walks through; its README lists a scripted tour. -- [Client guide](./client.md) — the per-API reference for everything used here (connecting, auth, tools, resources, prompts, handlers, errors). -- [Client quickstart](./client-quickstart.md) — the smallest possible LLM-connected client (tools only, one server); cli-client is what it grows into. -- [Spec: client concepts](https://modelcontextprotocol.io/docs/learn/client-concepts) — the protocol-level view of the features wired here. diff --git a/examples/cli-client/README.md b/examples/cli-client/README.md index 2280b01c2a..c9eca01ccb 100644 --- a/examples/cli-client/README.md +++ b/examples/cli-client/README.md @@ -1,6 +1,6 @@ # cli-client — the reference MCP host -An interactive, LLM-connected chat CLI with **no built-in tools**: everything the model can do comes from the MCP servers you connect it to. It is a minimal but complete host — every client-side MCP feature is wired the way a host application should wire it — and it is the example the [host-integration guide](../../docs/host-integration.md) walks through, file by file. +An interactive, LLM-connected chat CLI with **no built-in tools**: everything the model can do comes from the MCP servers you connect it to. It is a minimal but complete host — every client-side MCP feature is wired the way a host application should wire it — built to be read and copied from. Its standard workload is [`examples/todos-server`](../todos-server/README.md), the reference server, but it connects to **any** MCP server: a URL, a command line, or an `mcpServers`-style config file. @@ -140,6 +140,18 @@ script/ the scripted conversation CI replays test/ unit tests ``` -Unlike the single-feature stories, the SDK `Client`/transport construction here lives in `host/host.ts` rather than inline in the entry files — the host wiring is what this example documents, and the [host-integration guide](../../docs/host-integration.md) walks through it file by file. +Unlike the single-feature stories, the SDK `Client`/transport construction here lives in `host/host.ts` rather than inline in the entry files — the host wiring is what this example documents. + +## Design notes + +Choices in here that are worth understanding before copying: + +- **The provider seam is deliberately example-local.** The SDK stays a protocol library; a host's message shapes belong to the host. The seam earns its keep twice: MCP `Tool.inputSchema` is already JSON Schema and passes to each vendor API untouched, and the same `generate()` answers both the chat loop and servers' sampling requests — one model integration, two consumers. +- **Tool results go back to the model verbatim, including failures.** An `isError` result is fed back as a tool message rather than thrown, so the model can read the error and try something else. A round cap bounds a model that keeps calling tools forever. +- **Server-controlled text is untrusted display input.** ANSI/control escapes are stripped on every render path; attached resources are size-capped and wrapped in provenance labels so the model knows what it is reading and where it came from, and is told not to re-fetch it. +- **Prompts keep their roles.** `prompts/get` messages seed the conversation as separate user/assistant turns instead of being flattened into one block — that is what the shape is for. +- **Approvals are explicit and fail closed.** Sampling shows the full request (not a preview) and caps `maxTokens` regardless of what the server asked. Browser-opening — OAuth and URL-mode elicitation alike — requires `https:` (or loopback) and user consent. The OAuth callback's `state` is verified by the host, and a missing or mismatched value aborts the flow. +- **Tool execution is not gated here** because an interactive user watches every call and holds Ctrl-C. An unattended host must add a consent policy — confirm destructive or side-effecting calls, or keep a per-server allowlist — and should treat tool annotations (`readOnlyHint`, `destructiveHint`) as UX hints, never as a security boundary. +- **Spawned servers get a minimal environment**: the config entry's `env` plus defaults, never the host's full environment, so provider API keys cannot leak into child processes. Not goals of this example: it is not an agent framework (no plugins, sub-agents, or planning), there is no streaming output, no conversation persistence, and the providers make exactly one `generate()` call per turn. diff --git a/examples/cli-client/host/config.ts b/examples/cli-client/host/config.ts index 05c0f15289..15f358a003 100644 --- a/examples/cli-client/host/config.ts +++ b/examples/cli-client/host/config.ts @@ -4,8 +4,8 @@ import { siblingPath } from '@mcp-examples/shared'; import * as z from 'zod/v4'; /** - * The standard `mcpServers` config shape (the same one Claude Desktop, Claude Code, and most - * other hosts read): stdio servers are spawned from `command`/`args`, remote servers are + * The standard `mcpServers` config shape most MCP hosts read: stdio servers are spawned + * from `command`/`args`, remote servers are * reached via `url`. Anything you list here is code/infrastructure you trust — adding a * server means trusting it with whatever the model sends it. */ diff --git a/examples/cli-client/host/loop.ts b/examples/cli-client/host/loop.ts index 27091e2609..483eb0c3ce 100644 --- a/examples/cli-client/host/loop.ts +++ b/examples/cli-client/host/loop.ts @@ -40,7 +40,6 @@ export function buildSystemPrompt(host: McpHost): string { * messages so each provider can encode them natively, and `isError` results still go to the * model — it is allowed to read the error and try something else. */ -//#region theLoop export async function runModelRounds(session: ChatSession): Promise { const { host, provider, ui } = session; // Server instructions and the aggregated tool list are stable within a single user turn. @@ -97,7 +96,6 @@ export async function runModelRounds(session: ChatSession): Promise { } ui.print('(stopped: tool-call round limit reached)'); } -//#endregion theLoop /** Send one user turn (with optional attached-resource context blocks) through the loop. */ export async function runConversationTurn(session: ChatSession, userText: string, attachments: string[] = []): Promise { diff --git a/examples/cli-client/host/naming.ts b/examples/cli-client/host/naming.ts index faa78aacff..bc23fc3a55 100644 --- a/examples/cli-client/host/naming.ts +++ b/examples/cli-client/host/naming.ts @@ -1,6 +1,6 @@ /** * Per-server tool names are namespaced `mcp____` before they reach the model - * (the same scheme Claude Code uses), so two servers can both expose `search` and the host + * (a common host convention), so two servers can both expose `search` and the host * can always route a model-issued call back to the server that owns it. */ export function sanitizeServerName(name: string): string { diff --git a/examples/cli-client/providers/anthropic.ts b/examples/cli-client/providers/anthropic.ts index 701fa03f03..9f436f0418 100644 --- a/examples/cli-client/providers/anthropic.ts +++ b/examples/cli-client/providers/anthropic.ts @@ -26,7 +26,7 @@ function toContentBlocks(message: ChatMessage): Anthropic.ContentBlockParam[] { /** * Convert the provider-neutral request into Anthropic Messages API parameters. * - * The mapping every host writes for Claude: + * The mapping every host writes for the Anthropic Messages API: * - MCP tool definitions pass straight through — `inputSchema` is already JSON Schema. * - Assistant tool calls become `tool_use` blocks; tool results become `tool_result` blocks * inside a *user* message, and results for parallel tool calls must share one user message. diff --git a/examples/cli-client/providers/provider.ts b/examples/cli-client/providers/provider.ts index 5877c248b0..16cb51e1dc 100644 --- a/examples/cli-client/providers/provider.ts +++ b/examples/cli-client/providers/provider.ts @@ -11,7 +11,6 @@ * requests from servers. */ -//#region llmProvider export interface ToolDefinition { /** Namespaced tool name as exposed to the model (e.g. `mcp__todos__add_task`). */ name: string; @@ -57,7 +56,6 @@ export interface LLMProvider { readonly name: string; generate(request: GenerateRequest): Promise; } -//#endregion llmProvider export function textPart(text: string): ContentPart { return { type: 'text', text };