Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
".": "0.97.0",
".": "0.97.1",
"packages/vertex-sdk": "0.16.1",
"packages/bedrock-sdk": "0.29.2",
"packages/foundry-sdk": "0.2.3",
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## 0.97.1 (2026-05-19)

Full Changelog: [sdk-v0.97.0...sdk-v0.97.1](https://github.com/anthropics/anthropic-sdk-typescript/compare/sdk-v0.97.0...sdk-v0.97.1)

### Bug Fixes

* **runner:** skip tool calls SessionToolRunner does not own ([9987379](https://github.com/anthropics/anthropic-sdk-typescript/commit/9987379abeaf28b17edb5518ac229d2a6caa4bf6))

## 0.97.0 (2026-05-19)

Full Changelog: [sdk-v0.96.0...sdk-v0.97.0](https://github.com/anthropics/anthropic-sdk-typescript/compare/sdk-v0.96.0...sdk-v0.97.0)
Expand Down
10 changes: 8 additions & 2 deletions examples/managed-agents-observe-tool-calls.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,14 @@ async function heartbeatLease(
function printCall(call: DispatchedToolCall): void {
const input = truncate(JSON.stringify(call.event.input));
const status = call.isError ? 'error' : 'ok';
const posted = call.posted ? '' : ' [result post failed]';
console.log(`tool ${call.name}(${input}) -> ${status}${posted}`);
// A skipped unowned tool call has no result event and was deliberately
// left pending for its owner (the other client servicing this split
// session) — that is not a failed post.
const note =
call.posted ? ''
: call.result === undefined ? ' [skipped — not owned by this runner]'
: ' [result post failed]';
console.log(`tool ${call.name}(${input}) -> ${status}${note}`);
}

function requireEnv(name: string): string {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@anthropic-ai/sdk",
"version": "0.97.0",
"version": "0.97.1",
"description": "The official TypeScript library for the Anthropic API",
"author": "Anthropic <support@anthropic.com>",
"types": "dist/index.d.ts",
Expand Down
85 changes: 52 additions & 33 deletions src/lib/tools/SessionToolRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,12 @@ export interface DispatchedToolCall {
* `user.tool_result` for an `agent.tool_use`, a `user.custom_tool_result` for
* an `agent.custom_tool_use`. Read `result.content` for the tool's output
* blocks and `result.is_error` for the error flag.
*
* `undefined` when no result event was ever built — i.e. the tool name is
* not one this runner owns and, under the split-client behavior, it
* deliberately posted nothing and left the id pending for its owner.
*/
readonly result: DispatchedToolResultParams;
readonly result?: DispatchedToolResultParams;
/**
* Flat convenience for `event.id` — the id of the tool-use event this result
* answers (echoed back as `tool_use_id` / `custom_tool_use_id` on the result).
Expand All @@ -117,14 +121,16 @@ export interface DispatchedToolCall {
/** Flat convenience for `event.name` — the dispatched tool's name. */
readonly name: string;
/**
* Flat convenience for `result.is_error` — `true` when the tool threw or was
* not found, `false` on success.
* Flat convenience for `result.is_error` — `true` when the tool threw,
* `false` on success and for a skipped unowned call.
*/
readonly isError: boolean;
/**
* Whether the `user.tool_result` was successfully posted back to the session.
* `false` when the post itself failed — typically a permanent 4xx or
* send-retry exhaustion.
* Whether a result event for this call reached the session. `false` when the
* post itself failed (typically a permanent 4xx or send-retry exhaustion)
* and also `false` — with no `result` event ever built — for a tool name
* this runner does not own when it deliberately posts nothing and leaves the
* id pending for its owner (the split-client behavior).
*/
readonly posted: boolean;
}
Expand Down Expand Up @@ -451,35 +457,48 @@ export class SessionToolRunner implements AsyncIterable<DispatchedToolCall> {
this.#inFlightCount++;
try {
const tool = this.#toolByName.get(ev.name);
if (!tool) {
// Skip (split-client partial fulfilment): a name this runner
// is not registered for belongs to the other client servicing this
// session (typically the customer's app backend handling custom tools).
// Post NO result, do not mark it answered, and leave the tool_use_id
// pending for its owner — claiming it would corrupt the conversation.
// Still yield the call so the consumer can observe the unowned
// dispatch; nothing was sent, so `posted`/`isError` stay false and no
// `result` event is populated. The id stays unanswered, so reconcile
// keeps it out of the idle/end-turn accounting and re-surfaces it after
// a reconnect until its owner answers it.
this.#logger.info('tool not owned by this runner; leaving the tool_use_id pending for its owner', {
component: 'session-tool-runner',
session_id: this.sessionId,
tool: ev.name,
tool_use_id: ev.id,
});
this.#results.push({ event: ev, toolUseId: ev.id, name: ev.name, isError: false, posted: false });
return;
}
let content: string | Array<BetaToolResultContentBlockParam>;
let isError: boolean;
if (!tool) {
// Match `BetaToolRunner`'s wording — the string lands in the model's
// context, so the two `toolRunner()` surfaces should agree.
content = `Error: Tool '${ev.name}' not found`;
isError = true;
} else {
// Per-tool controller: aborts on the runner's own signal *or* the
// per-tool timeout, so an in-flight tool stops promptly when the runner
// is aborted instead of running until the timeout.
const toolCtrl = new AbortController();
const detachTool = linkAbort(this.#controller.signal, toolCtrl);
const timer = setTimeout(() => toolCtrl.abort(), TOOL_TIMEOUT_MS);
try {
// Pass the source `agent.tool_use` / `agent.custom_tool_use` event
// straight through as the run context's `toolUse` — it is a union
// member of `BetaToolUse`, no Messages-block adapter needed.
const outcome = await runRunnableTool(tool, ev.input, {
toolUse: ev,
toolUseBlock: ev,
signal: toolCtrl.signal,
});
content = outcome.content;
isError = outcome.isError;
} finally {
clearTimeout(timer);
detachTool();
}
// Per-tool controller: aborts on the runner's own signal *or* the
// per-tool timeout, so an in-flight tool stops promptly when the runner
// is aborted instead of running until the timeout.
const toolCtrl = new AbortController();
const detachTool = linkAbort(this.#controller.signal, toolCtrl);
const timer = setTimeout(() => toolCtrl.abort(), TOOL_TIMEOUT_MS);
try {
// Pass the source `agent.tool_use` / `agent.custom_tool_use` event
// straight through as the run context's `toolUse` — it is a union
// member of `BetaToolUse`, no Messages-block adapter needed.
const outcome = await runRunnableTool(tool, ev.input, {
toolUse: ev,
toolUseBlock: ev,
signal: toolCtrl.signal,
});
content = outcome.content;
isError = outcome.isError;
} finally {
clearTimeout(timer);
detachTool();
}
// Answer with the result event that matches the call kind: a
// `user.tool_result` for an `agent.tool_use`, a `user.custom_tool_result`
Expand Down
2 changes: 1 addition & 1 deletion src/version.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export const VERSION = '0.97.0'; // x-release-please-version
export const VERSION = '0.97.1'; // x-release-please-version
109 changes: 97 additions & 12 deletions tests/lib/tools/SessionToolRunner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ describe('SessionToolRunner', () => {
expect(call.event.input).toEqual({ tz: 'UTC' });
expect(call.isError).toBe(false);
expect(call.posted).toBe(true);
expect(call.result.content).toEqual([{ type: 'text', text: 'noon' }]);
expect(call.result!.content).toEqual([{ type: 'text', text: 'noon' }]);

// The send carried a user.tool_result with matching tool_use_id.
const sentResults = calls.send.flat().filter((e) => e.type === 'user.tool_result');
Expand All @@ -177,18 +177,103 @@ describe('SessionToolRunner', () => {

expect(calls).toHaveLength(1);
expect(calls[0]!.isError).toBe(true);
expect(JSON.stringify(calls[0]!.result.content)).toMatch(/kaboom/);
expect(JSON.stringify(calls[0]!.result!.content)).toMatch(/kaboom/);
});

test('yields isError=true for an unknown tool name', async () => {
const { client } = makeFake({ streams: [[toolUse('tu_u', 'no_such_tool'), TERMINATED]] });
const runner = new SessionToolRunner('s', { client, tools: [], maxIdleMs: 0 });
// Default (skip-by-default): a self-hosted session is serviced by two
// clients — this runner (sandbox tools) and the customer's app backend
// (custom tools). A tool-use whose name this runner is not registered for
// belongs to the other client: the runner must post NO result, claim
// nothing, and leave the id pending — while still yielding the dispatched
// call so the consumer can observe it. A registered tool still runs.
test('skips a tool name it does not own (builtin and custom) and still runs a registered tool', async () => {
let echoRuns = 0;
const echo = makeOkTool('echo', async () => {
echoRuns++;
return 'ran';
});
const { client, calls } = makeFake({
streams: [
[
toolUse('tu_x', 'not_ours'),
customToolUse('ctu_y', 'app_backend_tool'),
toolUse('tu_ok', 'echo'),
TERMINATED,
],
],
});
const runner = new SessionToolRunner('s', { client, tools: [echo], maxIdleMs: 0 });
const out: DispatchedToolCall[] = [];
// Must not throw on the registry miss.
for await (const c of runner) out.push(c);

expect(out).toHaveLength(3);

const unownedBuiltin = out[0]!;
expect(unownedBuiltin.name).toBe('not_ours');
expect(unownedBuiltin.event.type).toBe('agent.tool_use');
expect(unownedBuiltin.isError).toBe(false);
expect(unownedBuiltin.posted).toBe(false);
expect(unownedBuiltin.result).toBeUndefined();

const unownedCustom = out[1]!;
expect(unownedCustom.name).toBe('app_backend_tool');
expect(unownedCustom.event.type).toBe('agent.custom_tool_use');
expect(unownedCustom.isError).toBe(false);
expect(unownedCustom.posted).toBe(false);
expect(unownedCustom.result).toBeUndefined();

const owned = out[2]!;
expect(owned.name).toBe('echo');
expect(owned.isError).toBe(false);
expect(owned.posted).toBe(true);
expect(owned.result!.content).toEqual([{ type: 'text', text: 'ran' }]);
expect(echoRuns).toBe(1);

// Nothing was posted for either unowned id — only the registered tool's
// result reached the session.
const sent = calls.send.flat();
expect(sent).toHaveLength(1);
expect(sent[0]!.type).toBe('user.tool_result');
expect(sent[0]!['tool_use_id']).toBe('tu_ok');
expect(JSON.stringify(sent)).not.toContain('tu_x');
expect(JSON.stringify(sent)).not.toContain('ctu_y');
});

// A skipped (unanswered) unowned tool_use must stay OUT of the end-turn
// accounting: reconcile sees history ending on an end_turn idle but with the
// unowned tool_use still unanswered, so it must NOT arm the idle countdown —
// the runner has not handled that call, its owner still has to.
test('a skipped unowned tool_use does not falsely trip the idle watchdog', async () => {
const { client, calls } = makeFake({
streams: [[]], // no live events; reconcile drives the test
list: [[toolUse('evt_pending', 'not_ours'), idleEndTurn()]],
});
const runner = new SessionToolRunner('s', { client, tools: [], maxIdleMs: 50 });
const out: DispatchedToolCall[] = [];
let finished = false;
const consumer = (async () => {
for await (const c of runner) out.push(c);
finished = true;
})();

// Wait well past maxIdleMs (50). A wrongly-armed idle countdown would have
// aborted the runner ~50ms in and resolved the consumer; a correct runner
// keeps running because the unowned id is still pending its owner.
await new Promise((r) => setTimeout(r, 300));
expect(finished).toBe(false);

expect(out).toHaveLength(1);
expect(out[0]!.isError).toBe(true);
expect(JSON.stringify(out[0]!.result.content)).toMatch(/not found/);
const call = out[0]!;
expect(call.toolUseId).toBe('evt_pending');
expect(call.posted).toBe(false);
expect(call.isError).toBe(false);
expect(call.result).toBeUndefined();
expect(calls.send).toHaveLength(0);

runner.abort();
await consumer;
expect(finished).toBe(true);
});

test('does not re-execute a tool whose result is already in history', async () => {
Expand Down Expand Up @@ -356,9 +441,9 @@ describe('SessionToolRunner', () => {
expect(call.event.input).toEqual({ order_id: 42 });
expect(call.isError).toBe(false);
expect(call.posted).toBe(true);
expect(call.result.type).toBe('user.custom_tool_result');
expect((call.result as { custom_tool_use_id?: string }).custom_tool_use_id).toBe('ctu_1');
expect(call.result.content).toEqual([{ type: 'text', text: 'shipped' }]);
expect(call.result!.type).toBe('user.custom_tool_result');
expect((call.result! as { custom_tool_use_id?: string }).custom_tool_use_id).toBe('ctu_1');
expect(call.result!.content).toEqual([{ type: 'text', text: 'shipped' }]);

// A custom tool call must be answered with user.custom_tool_result, never
// user.tool_result — the wrong type leaves the session hung.
Expand Down Expand Up @@ -437,7 +522,7 @@ describe('SessionToolRunner', () => {
expect(out).toHaveLength(1);
// Mapped to the Sessions search_result block shape, NOT a text block with
// a JSON.stringify of the original block.
expect(out[0]!.result.content).toEqual([
expect(out[0]!.result!.content).toEqual([
{
type: 'search_result',
source: 'https://example.com/doc',
Expand Down Expand Up @@ -468,7 +553,7 @@ describe('SessionToolRunner', () => {
const out: DispatchedToolCall[] = [];
for await (const c of runner) out.push(c);

expect(out[0]!.result.content).toEqual([
expect(out[0]!.result!.content).toEqual([
{
type: 'search_result',
source: 'https://example.com',
Expand Down
Loading