diff --git a/docs/mcp.md b/docs/mcp.md index 213be95c..b952640a 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -77,6 +77,7 @@ The Provar DX CLI ships with a built-in **Model Context Protocol (MCP) server** - [AI loop pattern](#ai-loop-pattern) - [Quality scores explained](#quality-scores-explained) - [API compatibility — `xml` vs `xml_content`](#api-compatibility--xml-vs-xml_content) +- [Performance Tuning](#performance-tuning) --- @@ -2102,3 +2103,80 @@ provar_nitrox_patch → apply targeted edits to an existing .po.json (RFC 7 ``` > **Note:** `provar_automation_*` and `provar_qualityhub_*` tools invoke `sf` CLI subprocesses. The Salesforce CLI must be installed and in `PATH`, or pass `sf_path` pointing to the executable directly (e.g. `~/.nvm/versions/node/v22.0.0/bin/sf`). A missing `sf` binary returns the error code `SF_NOT_FOUND` with an installation hint. + +--- + +## Performance Tuning + +These environment variables let you control agentic-loop safety and observability without modifying tool code. + +### Agentic loop guard (`PROVAR_MCP_MAX_TOOL_DEPTH`) + +Limits the number of Provar tool calls an AI agent may make within a single MCP session before the server starts returning errors instead of results. + +``` +PROVAR_MCP_MAX_TOOL_DEPTH=30 # allow at most 30 tool calls per session (default: 50) +``` + +Once the limit is reached, every further call returns: + +```json +{ + "error": "TOOL_BUDGET_EXCEEDED", + "callsMade": 30, + "limit": 30, + "suggestion": "Summarize progress and return control to the user." +} +``` + +| Property | Value | +| --------- | -------------------------------------------------------------------------- | +| Default | `50` | +| Scope | Per MCP session (`sessionId` from the MCP SDK) | +| Exemption | `provardx_ping` is never counted or blocked | +| Memory | Sessions are tracked in-process; restarting the server resets all counters | + +The guard is designed to prevent runaway agentic loops from making hundreds of tool calls without human review. Set it lower (e.g. `10`) for tightly supervised workflows; raise it or omit it for long-running automation pipelines where you trust the agent. + +### Per-call token attribution (`PROVAR_MCP_EMIT_TOKEN_META`) + +Appends a `_meta` object to `structuredContent` on every tool response, giving observability tooling a lightweight token-cost signal per call. + +``` +PROVAR_MCP_EMIT_TOKEN_META=true +``` + +When enabled, `structuredContent` gains a `_meta` key: + +```json +{ + "result": "...", + "_meta": { + "tool": "provar_project_inspect", + "detailLevel": "standard", + "estimatedTokens": 412 + } +} +``` + +On `TOOL_BUDGET_EXCEEDED` errors the meta also includes the session cumulative total: + +```json +{ + "_meta": { + "tool": "provar_project_inspect", + "detailLevel": "standard", + "estimatedTokens": 38, + "sessionTotalEstimatedTokens": 8204 + } +} +``` + +| Field | Description | +| ----------------------------- | -------------------------------------------------------------------------------------------- | +| `tool` | Name of the tool that produced this response | +| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) | +| `estimatedTokens` | `ceil(len(JSON.stringify(response)) / 4)` — a rough character-to-token estimate | +| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors | + +> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. diff --git a/src/mcp/server.ts b/src/mcp/server.ts index c0eada46..75c17c0d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -35,6 +35,12 @@ import { registerAllNitroXTools } from './tools/nitroXTools.js'; import { registerAllTestCaseStepTools } from './tools/testCaseStepTools.js'; import { registerAllConnectionTools } from './tools/connectionTools.js'; import { registerAllPrompts } from './prompts/index.js'; +import { + createDepthGuardState, + wrapWithDepthGuard, + type AnyToolCallback, + type DepthGuardState, +} from './utils/tokenMeta.js'; import { desc } from './tools/descHelper.js'; // ── Tool group registry ─────────────────────────────────────────────────────── @@ -129,6 +135,12 @@ export function createProvarMcpServer(config: ServerConfig): McpServer { } ); + // ── Depth-guard middleware (PDX-474) ───────────────────────────────────────── + const rawLimit = parseInt(process.env['PROVAR_MCP_MAX_TOOL_DEPTH'] ?? '50', 10); + const depthLimit = Number.isNaN(rawLimit) || rawLimit <= 0 ? 50 : rawLimit; + const depthState = createDepthGuardState(); + patchWithMiddleware(server, depthState, depthLimit); + // ── Provar tools ───────────────────────────────────────────────────────────── const activeGroups = parseActiveGroups(); for (const [group, registrars] of Object.entries(TOOL_GROUPS)) { @@ -254,6 +266,15 @@ export function createProvarMcpServer(config: ServerConfig): McpServer { return server; } +function patchWithMiddleware(server: McpServer, state: DepthGuardState, limit: number): void { + const orig = server.registerTool.bind(server); + type RegisterToolFn = (n: string, c: unknown, h: AnyToolCallback) => unknown; + // Cast through unknown to patch the overloaded method without triggering no-unsafe-any. + const patchable = server as unknown as { registerTool: RegisterToolFn }; + patchable.registerTool = (name: string, config: unknown, handler: AnyToolCallback): unknown => + (orig as unknown as RegisterToolFn)(name, config, wrapWithDepthGuard(name, handler, state, limit)); +} + /** * Resolve the docs directory for bundled MCP Markdown resources. * In compiled output (lib/mcp/) the sibling docs/ dir exists; in dev/ts-node diff --git a/src/mcp/utils/tokenMeta.ts b/src/mcp/utils/tokenMeta.ts new file mode 100644 index 00000000..23ab6f48 --- /dev/null +++ b/src/mcp/utils/tokenMeta.ts @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +// --------------------------------------------------------------------------- // +// Minimal structural types — avoids importing SDK internal paths. +// --------------------------------------------------------------------------- // + +type ContentItem = { type: 'text'; text: string }; + +export interface ToolResult { + content: ContentItem[]; + structuredContent?: Record; + isError?: boolean; +} + +interface ToolExtra { + sessionId?: string; +} + +export type AnyToolCallback = (args: Record, extra: ToolExtra) => ToolResult | Promise; + +// --------------------------------------------------------------------------- // +// PDX-474 — Depth Guard (PROVAR_MCP_MAX_TOOL_DEPTH) +// --------------------------------------------------------------------------- // + +interface SessionEntry { + calls: number; + totalEstimatedTokens: number; +} + +export type DepthGuardState = Map; + +const MAX_SESSIONS = 1000; + +export function createDepthGuardState(): DepthGuardState { + return new Map(); +} + +function getOrCreateEntry(state: DepthGuardState, sessionId: string): SessionEntry { + if (!state.has(sessionId)) { + if (state.size >= MAX_SESSIONS) { + const oldest: string | undefined = state.keys().next().value as string | undefined; + if (oldest !== undefined) state.delete(oldest); + } + state.set(sessionId, { calls: 0, totalEstimatedTokens: 0 }); + } + // Non-null guaranteed by the set above or pre-existing entry. + return state.get(sessionId) as SessionEntry; +} + +/** + * Wraps a tool handler to enforce a per-session call budget. + * Once `limit` calls have been made for a session, every further call returns + * TOOL_BUDGET_EXCEEDED without invoking the underlying handler. + * Callers without a sessionId (stdio transports — Claude Desktop, Cursor, etc.) + * share a single 'anon' bucket so the budget actually limits runaway tool use; + * giving each anon call a fresh UUID would defeat the purpose of the guard. + * `provardx_ping` is excluded from wrapping at the call site in server.ts. + */ +export function wrapWithDepthGuard( + toolName: string, + handler: AnyToolCallback, + state: DepthGuardState, + limit: number +): AnyToolCallback { + return async (args, extra) => { + const sessionId = extra.sessionId ?? 'anon'; + const entry = getOrCreateEntry(state, sessionId); + + if (entry.calls >= limit) { + const payload = { + error: 'TOOL_BUDGET_EXCEEDED', + callsMade: entry.calls, + limit, + suggestion: 'Summarize progress and return control to the user.', + }; + const response: ToolResult = { + isError: true, + content: [{ type: 'text' as const, text: JSON.stringify(payload) }], + structuredContent: payload, + }; + return attachMeta(response, toolName, 'standard', entry.totalEstimatedTokens); + } + + entry.calls++; + const result = await handler(args, extra); + + if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] === 'true') { + entry.totalEstimatedTokens += estimateTokens(result); + } + + const detailLevel = typeof args['detail'] === 'string' ? args['detail'] : 'standard'; + return attachMeta(result, toolName, detailLevel); + }; +} + +// --------------------------------------------------------------------------- // +// PDX-475 — Token meta attachment (PROVAR_MCP_EMIT_TOKEN_META) +// --------------------------------------------------------------------------- // + +export function estimateTokens(payload: unknown): number { + return Math.ceil(JSON.stringify(payload).length / 4); +} + +/** + * Appends a `_meta` key to `structuredContent` when PROVAR_MCP_EMIT_TOKEN_META=true. + * The `content[0].text` string is intentionally left unchanged — LLMs read that + * field, so including meta there would waste tokens on observability data. + * + * @param sessionTotalTokens - Cumulative estimated tokens for the session, + * included only on TOOL_BUDGET_EXCEEDED errors. + */ +export function attachMeta( + response: ToolResult, + toolName: string, + detailLevel: string, + sessionTotalTokens?: number +): ToolResult { + if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] !== 'true') return response; + + const meta: Record = { + tool: toolName, + detailLevel, + estimatedTokens: estimateTokens(response), + }; + + if (sessionTotalTokens !== undefined) { + meta['sessionTotalEstimatedTokens'] = sessionTotalTokens; + } + + const existing = response.structuredContent ?? {}; + return { + ...response, + structuredContent: { ...existing, _meta: meta }, + }; +} diff --git a/test/unit/mcp/tokenMeta.test.ts b/test/unit/mcp/tokenMeta.test.ts new file mode 100644 index 00000000..b94bf8e7 --- /dev/null +++ b/test/unit/mcp/tokenMeta.test.ts @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +import { strict as assert } from 'node:assert'; +import { + createDepthGuardState, + wrapWithDepthGuard, + attachMeta, + estimateTokens, + type ToolResult, + type AnyToolCallback, +} from '../../../src/mcp/utils/tokenMeta.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeHandler(response: ToolResult): AnyToolCallback { + return () => response; +} + +const okResponse: ToolResult = { + content: [{ type: 'text', text: '{"ok":true}' }], + structuredContent: { ok: true }, +}; + +const errResponse: ToolResult = { + isError: true, + content: [{ type: 'text', text: '{"error":"oops"}' }], + structuredContent: { error: 'oops' }, +}; + +function withMeta(enabled: boolean, fn: () => void): void { + const prev = process.env['PROVAR_MCP_EMIT_TOKEN_META']; + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = enabled ? 'true' : 'false'; + try { + fn(); + } finally { + if (prev === undefined) { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + } else { + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = prev; + } + } +} + +// --------------------------------------------------------------------------- +// wrapWithDepthGuard +// --------------------------------------------------------------------------- + +describe('wrapWithDepthGuard', () => { + it('allows calls up to the limit', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 3); + const extra = { sessionId: 'sess-1' }; + const results = await Promise.all([wrapped({}, extra), wrapped({}, extra), wrapped({}, extra)]); + for (const result of results) { + assert.strictEqual(result.isError, undefined); + } + }); + + it('fires TOOL_BUDGET_EXCEEDED on the call that exceeds the limit', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 2); + const extra = { sessionId: 'sess-budget' }; + await Promise.all([wrapped({}, extra), wrapped({}, extra)]); + const result = await wrapped({}, extra); + assert.strictEqual(result.isError, true); + const body = JSON.parse(result.content[0].text) as Record; + assert.strictEqual(body['error'], 'TOOL_BUDGET_EXCEEDED'); + assert.strictEqual(body['callsMade'], 2); + assert.strictEqual(body['limit'], 2); + assert.ok(typeof body['suggestion'] === 'string' && body['suggestion'].length > 0); + }); + + it('blocks all subsequent calls once limit is exceeded', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + const extra = { sessionId: 'sess-block' }; + await wrapped({}, extra); + const [r1, r2] = await Promise.all([wrapped({}, extra), wrapped({}, extra)]); + assert.strictEqual(r1.isError, true); + assert.strictEqual(r2.isError, true); + }); + + it('tracks sessions independently', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + await wrapped({}, { sessionId: 'sess-A' }); + const [resultA, resultB] = await Promise.all([ + wrapped({}, { sessionId: 'sess-A' }), + wrapped({}, { sessionId: 'sess-B' }), + ]); + assert.strictEqual(resultA.isError, true); + assert.strictEqual(resultB.isError, undefined); + }); + + it('shares a single anon bucket across calls when sessionId is absent', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + // Stdio transports (Claude Desktop, Cursor) don't pass a sessionId — all such + // calls must share one bucket so the budget actually limits runaway tool use. + await wrapped({}, {}); + const blocked = await wrapped({}, {}); + assert.strictEqual(blocked.isError, true); + const body = JSON.parse(blocked.content[0].text) as Record; + assert.strictEqual(body['error'], 'TOOL_BUDGET_EXCEEDED'); + }); + + it('keeps named sessions independent from the anon bucket', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + await wrapped({}, {}); // anon bucket uses its 1 call + const namedResult = await wrapped({}, { sessionId: 'sess-named' }); + assert.strictEqual(namedResult.isError, undefined); + }); + + it('includes a non-empty suggestion in TOOL_BUDGET_EXCEEDED', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 0); + const result = await wrapped({}, { sessionId: 'sess-hint' }); + const body = JSON.parse(result.content[0].text) as Record; + assert.ok(typeof body['suggestion'] === 'string' && body['suggestion'].length > 10); + }); + + it('evicts the oldest session when MAX_SESSIONS (1000) is reached', async () => { + const state = createDepthGuardState(); + const limit = 1; + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, limit); + + // Fill up to 1000 sessions + await Promise.all(Array.from({ length: 1000 }, (_, i) => wrapped({}, { sessionId: `fill-${i}` }))); + assert.strictEqual(state.size, 1000); + + // Adding a 1001st session should evict the oldest (fill-0). + await wrapped({}, { sessionId: 'newcomer' }); + assert.strictEqual(state.size, 1000); + assert.strictEqual(state.has('fill-0'), false); + assert.strictEqual(state.has('newcomer'), true); + }); +}); + +// --------------------------------------------------------------------------- +// attachMeta +// --------------------------------------------------------------------------- + +describe('attachMeta', () => { + it('attaches _meta when PROVAR_MCP_EMIT_TOKEN_META=true', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta, '_meta should be present'); + assert.strictEqual(meta['tool'], 'my_tool'); + assert.strictEqual(meta['detailLevel'], 'standard'); + assert.ok(typeof meta['estimatedTokens'] === 'number' && meta['estimatedTokens'] > 0); + }); + }); + + it('returns response unchanged when PROVAR_MCP_EMIT_TOKEN_META is not "true"', () => { + withMeta(false, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result, okResponse); + }); + }); + + it('returns response unchanged when env var is absent', () => { + const prev = process.env['PROVAR_MCP_EMIT_TOKEN_META']; + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + try { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result, okResponse); + } finally { + if (prev !== undefined) process.env['PROVAR_MCP_EMIT_TOKEN_META'] = prev; + } + }); + + it('attaches _meta on error responses', () => { + withMeta(true, () => { + const result = attachMeta(errResponse, 'my_tool', 'full'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.strictEqual(meta['detailLevel'], 'full'); + }); + }); + + it('includes sessionTotalEstimatedTokens when provided', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard', 999); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['sessionTotalEstimatedTokens'], 999); + }); + }); + + it('does not include sessionTotalEstimatedTokens when not provided', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual('sessionTotalEstimatedTokens' in meta, false); + }); + }); + + it('does not modify content[0].text', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result.content[0].text, okResponse.content[0].text); + }); + }); + + it('estimated_tokens is within ±50% of actual JSON length / 4', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + const estimate = meta['estimatedTokens'] as number; + const actual = Math.ceil(JSON.stringify(okResponse).length / 4); + assert.ok( + estimate >= actual * 0.5 && estimate <= actual * 1.5, + `estimate ${estimate} should be within ±50% of ${actual}` + ); + }); + }); +}); + +// --------------------------------------------------------------------------- +// estimateTokens +// --------------------------------------------------------------------------- + +describe('estimateTokens', () => { + it('returns a positive integer', () => { + const tokens = estimateTokens({ hello: 'world' }); + assert.ok(Number.isInteger(tokens) && tokens > 0); + }); + + it('returns ceil(len/4) of JSON string', () => { + const obj = { a: 1 }; + const expected = Math.ceil(JSON.stringify(obj).length / 4); + assert.strictEqual(estimateTokens(obj), expected); + }); +}); + +// --------------------------------------------------------------------------- +// Integration: wrapWithDepthGuard + attachMeta +// --------------------------------------------------------------------------- + +describe('integration: wrapWithDepthGuard + attachMeta', () => { + beforeEach(() => { + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = 'true'; + }); + afterEach(() => { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + }); + + it('attaches _meta on successful tool call', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-1' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.strictEqual(meta['tool'], 'my_tool'); + }); + + it('attaches _meta on TOOL_BUDGET_EXCEEDED error', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 0); + const result = await wrapped({}, { sessionId: 'int-err' }); + assert.strictEqual(result.isError, true); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.ok('sessionTotalEstimatedTokens' in meta); + }); + + it('uses detail arg from args when present', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({ detail: 'summary' }, { sessionId: 'int-detail' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['detailLevel'], 'summary'); + }); + + it('defaults detail_level to "standard" when detail arg is absent', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-nodetail' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['detailLevel'], 'standard'); + }); + + it('preserves existing structuredContent keys alongside _meta', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-preserve' }); + const sc = result.structuredContent as Record; + assert.strictEqual(sc['ok'], true); + assert.ok(sc['_meta']); + }); + + it('does not attach _meta when env var is disabled', async () => { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-disabled' }); + const sc = result.structuredContent as Record; + assert.strictEqual('_meta' in sc, false); + }); + + it('propagates handler errors', async () => { + const state = createDepthGuardState(); + const throwingHandler: AnyToolCallback = () => { + throw new Error('handler blew up'); + }; + const wrapped = wrapWithDepthGuard('my_tool', throwingHandler, state, 50); + await assert.rejects(async () => wrapped({}, { sessionId: 'int-throw' }), /handler blew up/); + }); +});