From 1987147935325ea6b1e0f96cea6851c9d407e6c2 Mon Sep 17 00:00:00 2001 From: Packy Gallagher Date: Tue, 19 May 2026 08:36:35 -0700 Subject: [PATCH 1/2] fix(runner): skip tool calls SessionToolRunner does not own `SessionToolRunner` now skips tool calls whose names it was not registered with: it posts no result and leaves the `tool_use_id` pending, rather than answering them with an error. A session can be served by more than one client at once, and each client should only fulfill the tool calls it owns. Answering for a tool another client owns claims a `tool_use_id` that isn't this runner's to fulfill and corrupts the session, so unrecognized names are now left for whoever owns them. --- examples/managed-agents-observe-tool-calls.ts | 10 +- src/lib/tools/SessionToolRunner.ts | 85 ++++++++------ tests/lib/tools/SessionToolRunner.test.ts | 109 ++++++++++++++++-- 3 files changed, 157 insertions(+), 47 deletions(-) diff --git a/examples/managed-agents-observe-tool-calls.ts b/examples/managed-agents-observe-tool-calls.ts index c74fa0ab..fb88b840 100644 --- a/examples/managed-agents-observe-tool-calls.ts +++ b/examples/managed-agents-observe-tool-calls.ts @@ -230,8 +230,14 @@ async function heartbeatLease( function printCall(call: DispatchedToolCall): void { const input = truncate(JSON.stringify(call.event.input)); const status = call.isError ? 'error' : 'ok'; - const posted = call.posted ? '' : ' [result post failed]'; - console.log(`tool ${call.name}(${input}) -> ${status}${posted}`); + // A skipped unowned tool call has no result event and was deliberately + // left pending for its owner (the other client servicing this split + // session) — that is not a failed post. + const note = + call.posted ? '' + : call.result === undefined ? ' [skipped — not owned by this runner]' + : ' [result post failed]'; + console.log(`tool ${call.name}(${input}) -> ${status}${note}`); } function requireEnv(name: string): string { diff --git a/src/lib/tools/SessionToolRunner.ts b/src/lib/tools/SessionToolRunner.ts index b233c748..a49b350f 100644 --- a/src/lib/tools/SessionToolRunner.ts +++ b/src/lib/tools/SessionToolRunner.ts @@ -107,8 +107,12 @@ export interface DispatchedToolCall { * `user.tool_result` for an `agent.tool_use`, a `user.custom_tool_result` for * an `agent.custom_tool_use`. Read `result.content` for the tool's output * blocks and `result.is_error` for the error flag. + * + * `undefined` when no result event was ever built — i.e. the tool name is + * not one this runner owns and, under the split-client behavior, it + * deliberately posted nothing and left the id pending for its owner. */ - readonly result: DispatchedToolResultParams; + readonly result?: DispatchedToolResultParams; /** * Flat convenience for `event.id` — the id of the tool-use event this result * answers (echoed back as `tool_use_id` / `custom_tool_use_id` on the result). @@ -117,14 +121,16 @@ export interface DispatchedToolCall { /** Flat convenience for `event.name` — the dispatched tool's name. */ readonly name: string; /** - * Flat convenience for `result.is_error` — `true` when the tool threw or was - * not found, `false` on success. + * Flat convenience for `result.is_error` — `true` when the tool threw, + * `false` on success and for a skipped unowned call. */ readonly isError: boolean; /** - * Whether the `user.tool_result` was successfully posted back to the session. - * `false` when the post itself failed — typically a permanent 4xx or - * send-retry exhaustion. + * Whether a result event for this call reached the session. `false` when the + * post itself failed (typically a permanent 4xx or send-retry exhaustion) + * and also `false` — with no `result` event ever built — for a tool name + * this runner does not own when it deliberately posts nothing and leaves the + * id pending for its owner (the split-client behavior). */ readonly posted: boolean; } @@ -451,35 +457,48 @@ export class SessionToolRunner implements AsyncIterable { this.#inFlightCount++; try { const tool = this.#toolByName.get(ev.name); + if (!tool) { + // Skip (split-client partial fulfilment): a name this runner + // is not registered for belongs to the other client servicing this + // session (typically the customer's app backend handling custom tools). + // Post NO result, do not mark it answered, and leave the tool_use_id + // pending for its owner — claiming it would corrupt the conversation. + // Still yield the call so the consumer can observe the unowned + // dispatch; nothing was sent, so `posted`/`isError` stay false and no + // `result` event is populated. The id stays unanswered, so reconcile + // keeps it out of the idle/end-turn accounting and re-surfaces it after + // a reconnect until its owner answers it. + this.#logger.info('tool not owned by this runner; leaving the tool_use_id pending for its owner', { + component: 'session-tool-runner', + session_id: this.sessionId, + tool: ev.name, + tool_use_id: ev.id, + }); + this.#results.push({ event: ev, toolUseId: ev.id, name: ev.name, isError: false, posted: false }); + return; + } let content: string | Array; let isError: boolean; - if (!tool) { - // Match `BetaToolRunner`'s wording — the string lands in the model's - // context, so the two `toolRunner()` surfaces should agree. - content = `Error: Tool '${ev.name}' not found`; - isError = true; - } else { - // Per-tool controller: aborts on the runner's own signal *or* the - // per-tool timeout, so an in-flight tool stops promptly when the runner - // is aborted instead of running until the timeout. - const toolCtrl = new AbortController(); - const detachTool = linkAbort(this.#controller.signal, toolCtrl); - const timer = setTimeout(() => toolCtrl.abort(), TOOL_TIMEOUT_MS); - try { - // Pass the source `agent.tool_use` / `agent.custom_tool_use` event - // straight through as the run context's `toolUse` — it is a union - // member of `BetaToolUse`, no Messages-block adapter needed. - const outcome = await runRunnableTool(tool, ev.input, { - toolUse: ev, - toolUseBlock: ev, - signal: toolCtrl.signal, - }); - content = outcome.content; - isError = outcome.isError; - } finally { - clearTimeout(timer); - detachTool(); - } + // Per-tool controller: aborts on the runner's own signal *or* the + // per-tool timeout, so an in-flight tool stops promptly when the runner + // is aborted instead of running until the timeout. + const toolCtrl = new AbortController(); + const detachTool = linkAbort(this.#controller.signal, toolCtrl); + const timer = setTimeout(() => toolCtrl.abort(), TOOL_TIMEOUT_MS); + try { + // Pass the source `agent.tool_use` / `agent.custom_tool_use` event + // straight through as the run context's `toolUse` — it is a union + // member of `BetaToolUse`, no Messages-block adapter needed. + const outcome = await runRunnableTool(tool, ev.input, { + toolUse: ev, + toolUseBlock: ev, + signal: toolCtrl.signal, + }); + content = outcome.content; + isError = outcome.isError; + } finally { + clearTimeout(timer); + detachTool(); } // Answer with the result event that matches the call kind: a // `user.tool_result` for an `agent.tool_use`, a `user.custom_tool_result` diff --git a/tests/lib/tools/SessionToolRunner.test.ts b/tests/lib/tools/SessionToolRunner.test.ts index edacff20..87fa58fe 100644 --- a/tests/lib/tools/SessionToolRunner.test.ts +++ b/tests/lib/tools/SessionToolRunner.test.ts @@ -157,7 +157,7 @@ describe('SessionToolRunner', () => { expect(call.event.input).toEqual({ tz: 'UTC' }); expect(call.isError).toBe(false); expect(call.posted).toBe(true); - expect(call.result.content).toEqual([{ type: 'text', text: 'noon' }]); + expect(call.result!.content).toEqual([{ type: 'text', text: 'noon' }]); // The send carried a user.tool_result with matching tool_use_id. const sentResults = calls.send.flat().filter((e) => e.type === 'user.tool_result'); @@ -177,18 +177,103 @@ describe('SessionToolRunner', () => { expect(calls).toHaveLength(1); expect(calls[0]!.isError).toBe(true); - expect(JSON.stringify(calls[0]!.result.content)).toMatch(/kaboom/); + expect(JSON.stringify(calls[0]!.result!.content)).toMatch(/kaboom/); }); - test('yields isError=true for an unknown tool name', async () => { - const { client } = makeFake({ streams: [[toolUse('tu_u', 'no_such_tool'), TERMINATED]] }); - const runner = new SessionToolRunner('s', { client, tools: [], maxIdleMs: 0 }); + // Default (skip-by-default): a self-hosted session is serviced by two + // clients — this runner (sandbox tools) and the customer's app backend + // (custom tools). A tool-use whose name this runner is not registered for + // belongs to the other client: the runner must post NO result, claim + // nothing, and leave the id pending — while still yielding the dispatched + // call so the consumer can observe it. A registered tool still runs. + test('skips a tool name it does not own (builtin and custom) and still runs a registered tool', async () => { + let echoRuns = 0; + const echo = makeOkTool('echo', async () => { + echoRuns++; + return 'ran'; + }); + const { client, calls } = makeFake({ + streams: [ + [ + toolUse('tu_x', 'not_ours'), + customToolUse('ctu_y', 'app_backend_tool'), + toolUse('tu_ok', 'echo'), + TERMINATED, + ], + ], + }); + const runner = new SessionToolRunner('s', { client, tools: [echo], maxIdleMs: 0 }); const out: DispatchedToolCall[] = []; + // Must not throw on the registry miss. for await (const c of runner) out.push(c); + expect(out).toHaveLength(3); + + const unownedBuiltin = out[0]!; + expect(unownedBuiltin.name).toBe('not_ours'); + expect(unownedBuiltin.event.type).toBe('agent.tool_use'); + expect(unownedBuiltin.isError).toBe(false); + expect(unownedBuiltin.posted).toBe(false); + expect(unownedBuiltin.result).toBeUndefined(); + + const unownedCustom = out[1]!; + expect(unownedCustom.name).toBe('app_backend_tool'); + expect(unownedCustom.event.type).toBe('agent.custom_tool_use'); + expect(unownedCustom.isError).toBe(false); + expect(unownedCustom.posted).toBe(false); + expect(unownedCustom.result).toBeUndefined(); + + const owned = out[2]!; + expect(owned.name).toBe('echo'); + expect(owned.isError).toBe(false); + expect(owned.posted).toBe(true); + expect(owned.result!.content).toEqual([{ type: 'text', text: 'ran' }]); + expect(echoRuns).toBe(1); + + // Nothing was posted for either unowned id — only the registered tool's + // result reached the session. + const sent = calls.send.flat(); + expect(sent).toHaveLength(1); + expect(sent[0]!.type).toBe('user.tool_result'); + expect(sent[0]!['tool_use_id']).toBe('tu_ok'); + expect(JSON.stringify(sent)).not.toContain('tu_x'); + expect(JSON.stringify(sent)).not.toContain('ctu_y'); + }); + + // A skipped (unanswered) unowned tool_use must stay OUT of the end-turn + // accounting: reconcile sees history ending on an end_turn idle but with the + // unowned tool_use still unanswered, so it must NOT arm the idle countdown — + // the runner has not handled that call, its owner still has to. + test('a skipped unowned tool_use does not falsely trip the idle watchdog', async () => { + const { client, calls } = makeFake({ + streams: [[]], // no live events; reconcile drives the test + list: [[toolUse('evt_pending', 'not_ours'), idleEndTurn()]], + }); + const runner = new SessionToolRunner('s', { client, tools: [], maxIdleMs: 50 }); + const out: DispatchedToolCall[] = []; + let finished = false; + const consumer = (async () => { + for await (const c of runner) out.push(c); + finished = true; + })(); + + // Wait well past maxIdleMs (50). A wrongly-armed idle countdown would have + // aborted the runner ~50ms in and resolved the consumer; a correct runner + // keeps running because the unowned id is still pending its owner. + await new Promise((r) => setTimeout(r, 300)); + expect(finished).toBe(false); + expect(out).toHaveLength(1); - expect(out[0]!.isError).toBe(true); - expect(JSON.stringify(out[0]!.result.content)).toMatch(/not found/); + const call = out[0]!; + expect(call.toolUseId).toBe('evt_pending'); + expect(call.posted).toBe(false); + expect(call.isError).toBe(false); + expect(call.result).toBeUndefined(); + expect(calls.send).toHaveLength(0); + + runner.abort(); + await consumer; + expect(finished).toBe(true); }); test('does not re-execute a tool whose result is already in history', async () => { @@ -356,9 +441,9 @@ describe('SessionToolRunner', () => { expect(call.event.input).toEqual({ order_id: 42 }); expect(call.isError).toBe(false); expect(call.posted).toBe(true); - expect(call.result.type).toBe('user.custom_tool_result'); - expect((call.result as { custom_tool_use_id?: string }).custom_tool_use_id).toBe('ctu_1'); - expect(call.result.content).toEqual([{ type: 'text', text: 'shipped' }]); + expect(call.result!.type).toBe('user.custom_tool_result'); + expect((call.result! as { custom_tool_use_id?: string }).custom_tool_use_id).toBe('ctu_1'); + expect(call.result!.content).toEqual([{ type: 'text', text: 'shipped' }]); // A custom tool call must be answered with user.custom_tool_result, never // user.tool_result — the wrong type leaves the session hung. @@ -437,7 +522,7 @@ describe('SessionToolRunner', () => { expect(out).toHaveLength(1); // Mapped to the Sessions search_result block shape, NOT a text block with // a JSON.stringify of the original block. - expect(out[0]!.result.content).toEqual([ + expect(out[0]!.result!.content).toEqual([ { type: 'search_result', source: 'https://example.com/doc', @@ -468,7 +553,7 @@ describe('SessionToolRunner', () => { const out: DispatchedToolCall[] = []; for await (const c of runner) out.push(c); - expect(out[0]!.result.content).toEqual([ + expect(out[0]!.result!.content).toEqual([ { type: 'search_result', source: 'https://example.com', From ac9ece3c566b4488dcf73849c17b656ec2d7d17d Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 15:37:26 +0000 Subject: [PATCH 2/2] chore: release main --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ package.json | 2 +- src/version.ts | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index ff827d8d..42db1134 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,5 +1,5 @@ { - ".": "0.97.0", + ".": "0.97.1", "packages/vertex-sdk": "0.16.1", "packages/bedrock-sdk": "0.29.2", "packages/foundry-sdk": "0.2.3", diff --git a/CHANGELOG.md b/CHANGELOG.md index 9508a464..763b97a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.97.1 (2026-05-19) + +Full Changelog: [sdk-v0.97.0...sdk-v0.97.1](https://github.com/anthropics/anthropic-sdk-typescript/compare/sdk-v0.97.0...sdk-v0.97.1) + +### Bug Fixes + +* **runner:** skip tool calls SessionToolRunner does not own ([9987379](https://github.com/anthropics/anthropic-sdk-typescript/commit/9987379abeaf28b17edb5518ac229d2a6caa4bf6)) + ## 0.97.0 (2026-05-19) Full Changelog: [sdk-v0.96.0...sdk-v0.97.0](https://github.com/anthropics/anthropic-sdk-typescript/compare/sdk-v0.96.0...sdk-v0.97.0) diff --git a/package.json b/package.json index a0a17fe3..03d4f156 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@anthropic-ai/sdk", - "version": "0.97.0", + "version": "0.97.1", "description": "The official TypeScript library for the Anthropic API", "author": "Anthropic ", "types": "dist/index.d.ts", diff --git a/src/version.ts b/src/version.ts index 1b7c9366..9460bf4f 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '0.97.0'; // x-release-please-version +export const VERSION = '0.97.1'; // x-release-please-version