diff --git a/docs/mcp-pilot-guide.md b/docs/mcp-pilot-guide.md index 830135ef..bf7e0073 100644 --- a/docs/mcp-pilot-guide.md +++ b/docs/mcp-pilot-guide.md @@ -453,6 +453,8 @@ NitroX is Provar's Hybrid Model for locators — it maps Salesforce component-ba If a pilot LLM falls into the multi-call pattern despite the description contract, the runtime guard converts the failure into an actionable error rather than a silently broken file on disk. +**Title-level contract:** the chip-level `title` fields for the two tools — `Generate Test Case (full steps in one call)` and `Amend Existing Test Case Step` — carry the construct-vs-amend split at the tool-picker surface. MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references in chat threads) still expose the contract to the agent before any description is read. + **Prompt:** > "Create a Provar test case `AccountFlow.testcase` that covers three scenarios: diff --git a/docs/mcp.md b/docs/mcp.md index c935cc2d..33d23421 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -703,6 +703,8 @@ Validates a Java Page Object source file against 30+ quality rules (structural c Generates an XML test case skeleton with UUID v4 guids and sequential `testItemId` values. +The tool's chip-level `title` — `Generate Test Case (full steps in one call)` — carries the construction contract so that MCP clients which render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references) surface the single-call requirement to the agent before any description is read. + > **Construction pattern (read first).** Pass the FULL step tree for the test case in a single call via the `steps[]` array. Do **not** call this tool with `steps: []` and then append steps via repeated `provar_testcase_step_edit` calls — that pattern drops scenarios, flattens nesting, and produces inconsistent step types. `provar_testcase_step_edit` is for **amending** an already-validated test case (single-step add, attribute fix, debug edit), not for **constructing** one from scratch. **Generated `` element structure (Provar requirements):** @@ -1559,6 +1561,8 @@ Salesforce DML error categories (`SALESFORCE_*`) represent test-data failures Atomically add or remove a single step (``) in a Provar XML test case file. Writes a `.bak` backup before mutating, runs structural validation after the edit, and automatically restores the backup if validation fails. +The tool's chip-level `title` — `Amend Existing Test Case Step` — signals the amendment-only contract in MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references). An agent that reads only the title still sees that this tool operates on an existing test case, not a new one. + > **When to use.** This tool is for **amending** an existing, already-validated test case (single-step add, attribute fix, debug edit). It is **not** for constructing a test case from scratch by calling it repeatedly after a `steps: []` `provar_testcase_generate`. Building a case step-by-step via repeated `step_edit` calls produces structurally invalid test cases (dropped scenarios, flat asserts, inconsistent step types). For new test cases, pass the full step tree to `provar_testcase_generate` in a single call. Prerequisites: the test case file must exist and be valid XML with a `` structure. diff --git a/scripts/pdx-482-validate.cjs b/scripts/pdx-482-validate.cjs index 8685e9d6..01537deb 100644 --- a/scripts/pdx-482-validate.cjs +++ b/scripts/pdx-482-validate.cjs @@ -1,13 +1,13 @@ -// PDX-482 / PDX-483 validation: confirm the construct/amend contract is reachable -// at the MCP protocol surface and that the PDX-483 runtime guard rejects the -// PDX-479 multi-call pattern shape. +// PDX-482 / PDX-483 / PDX-484 validation: confirm the construct/amend contract +// is reachable at every MCP protocol surface the LLM sees, and that the runtime +// guard rejects the multi-call construction shape. // -// PDX-482 (standard + compact modes): assertions on tools/list — every byte the -// LLM literally sees at the call site. Compact mode coverage is critical because +// PDX-482 — description contract (standard + compact schema modes): assertions +// on tools/list description bodies. Compact mode coverage is critical because // the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact silently // swapped the description for a contract-free one-liner. // -// PDX-483 (runtime-guard mode): drives a real tools/call with the rejected shape +// PDX-483 — runtime guard: drives a real tools/call with the rejected shape // (steps:[]+dry_run:false+output_path) and asserts the response is a structured // STEPS_REQUIRED error with a non-empty details.suggestion. This catches a // regression class that the tools/list assertions cannot reach: the passive @@ -15,6 +15,11 @@ // regresses (e.g. a refactor reorders the handler so writes happen before the // check). // +// PDX-484 — title contract: assertions on the `title:` field that some clients +// render exclusively (Claude Desktop chips, Cursor audit pane, inline tool-call +// refs). Titles are schema-mode-independent but we assert in both passes to +// surface drift early either way. +// // yarn compile // node scripts/pdx-482-validate.cjs @@ -105,6 +110,43 @@ function runValidation(mode, extraEnv, runAssertions) { }); } +// ── PDX-484: title-level construct-vs-amend contract ─────────────────────── +// Title field is independent of schema mode, but we assert it in both passes +// to catch drift early regardless of which mode a future refactor breaks. +function titleAssertions(toolList, record) { + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + if (!gen) { + record('provar_testcase_generate has a title', false, 'tool not found'); + } else { + const t = gen.title ?? ''; + record( + 'generate.title carries "one call" or "single call" (PDX-484)', + t.includes('one call') || t.includes('single call'), + `title: ${JSON.stringify(t)}` + ); + record('generate.title mentions steps (PDX-484)', /step/i.test(t), 'chip-level payload shape must be visible'); + record('generate.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + } + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + if (!edit) { + record('provar_testcase_step_edit has a title', false, 'tool not found'); + } else { + const t = edit.title ?? ''; + record( + 'step_edit.title contains "Amend" or "amendment" (PDX-484)', + /amend/i.test(t), + `title: ${JSON.stringify(t)}` + ); + record( + 'step_edit.title signals "existing" test case only (PDX-484)', + /exist/i.test(t), + 'chip-level signal that this tool does not construct new cases' + ); + record('step_edit.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + } +} + // ── Assertions for standard mode (full TOOL_DESCRIPTION) ──────────────────── function standardAssertions(toolList, record) { const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); @@ -197,6 +239,9 @@ function standardAssertions(toolList, record) { 'consequence is explicit so the contract is judgement-friendly' ); } + + // PDX-484: title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); } // ── Assertions for compact mode (short one-liner) ─────────────────────────── @@ -246,6 +291,9 @@ function compactAssertions(toolList, record) { 'rejection must survive compact mode' ); } + + // PDX-484: title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); } // ── PDX-483 runtime guard: tools/call assertion ───────────────────────────── diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index 3249cb3d..21ee6a70 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -167,7 +167,13 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_generate', { - title: 'Generate Test Case', + // PDX-484: carry the construct-vs-amend contract into the `title:` field + // because many MCP clients (Claude Desktop tool-picker chips, Cursor audit + // pane, inline tool-call references in chat threads) render only the title. + // Without the "(full steps in one call)" suffix an agent that reads only + // the title surface gets zero PDX-479 protection. Length: 43 chars — + // well under the ~50 char comfort threshold for the clients we test. + title: 'Generate Test Case (full steps in one call)', description: desc( TOOL_DESCRIPTION, // PDX-482: the compact form must also carry the construction contract, diff --git a/src/mcp/tools/testCaseStepTools.ts b/src/mcp/tools/testCaseStepTools.ts index c4e5ecc1..bafb4e7d 100644 --- a/src/mcp/tools/testCaseStepTools.ts +++ b/src/mcp/tools/testCaseStepTools.ts @@ -86,7 +86,11 @@ export function registerTestCaseStepEdit(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_step_edit', { - title: 'Edit Test Case Step', + // PDX-484: carry the AMENDMENT-ONLY contract into the `title:` field. + // "Amend" mirrors the AMENDMENT-ONLY framing in the description body + // and "Existing" signals that the tool does not construct new test cases. + // Length: 29 chars — well within the chip-render comfort threshold. + title: 'Amend Existing Test Case Step', description: desc( [ // ── Usage contract (READ FIRST — PDX-482) ───────────────────────────── diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts index af65578d..95c7579b 100644 --- a/test/unit/mcp/testCaseGenerate.test.ts +++ b/test/unit/mcp/testCaseGenerate.test.ts @@ -21,7 +21,9 @@ import type { ServerConfig } from '../../../src/mcp/server.js'; type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _description: string, _schema: unknown, handler: ToolHandler): void { @@ -30,8 +32,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -151,6 +161,35 @@ describe('provar_testcase_generate description', () => { ); }); + // ── PDX-484: title-level construct-vs-amend contract ────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the single-call construction contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.includes('one call') || reg.title.includes('single call'), + 'title must contain "one call" or "single call" so the contract is visible in tool-picker chips' + ); + assert.ok( + /step/i.test(reg.title), + 'title must mention steps so the LLM sees the payload shape at the chip-level surface' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); + // ── PDX-482 hardening: compact-mode coverage (adversarial review fix) ────── // PROVAR_MCP_SCHEMA_MODE=compact swaps the entire description for a short // one-liner. Without this guard, compact mode is a regression highway: diff --git a/test/unit/mcp/testCaseStepTools.test.ts b/test/unit/mcp/testCaseStepTools.test.ts index 17ca038b..c353d264 100644 --- a/test/unit/mcp/testCaseStepTools.test.ts +++ b/test/unit/mcp/testCaseStepTools.test.ts @@ -18,7 +18,9 @@ import { registerAllTestCaseStepTools } from '../../../src/mcp/tools/testCaseSte type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _desc: string, _schema: unknown, handler: ToolHandler): void { @@ -27,8 +29,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -145,6 +155,35 @@ describe('provar_testcase_step_edit description', () => { 'description must call out "inconsistent step types" (the third observable defect)' ); }); + + // ── PDX-484: title-level amendment-only contract ─────────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the amendment-only contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + /amend/i.test(reg.title), + 'title must contain "Amend" or "amendment" so the contract is visible in tool-picker chips' + ); + assert.ok( + /exist/i.test(reg.title), + 'title must signal "existing test case only" so an agent reading only the chip does not call this for construction' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); }); // ── provar_testcase_step_edit ──────────────────────────────────────────────────