From ec2230b641054f891025b748ade66e2e8ac3537b Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Fri, 15 May 2026 11:10:36 -0500 Subject: [PATCH] PDX-484: feat(mcp): carry construct-vs-amend contract into tool titles RCA: PDX-482 hardened the description bodies for provar_testcase_generate and provar_testcase_step_edit, but many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references in chat threads) render only the title field. The previous bare titles ("Generate Test Case", "Edit Test Case Step") gave zero PDX-479 protection to agents reading only the chip-level surface. Fix: Updated the two tool titles to "Generate Test Case (full steps in one call)" (43 chars) and "Amend Existing Test Case Step" (29 chars). Both clear the cross-client chip-render comfort threshold (<= 50 chars). Extended MockMcpServer in the two test files to capture title alongside description; added unit assertions for the canonical phrasing and length. Extended scripts/pdx-482-validate.cjs with a titleAssertions helper run in both standard and compact schema modes (titles are mode-independent but asserting in both surfaces drift early). Updated docs/mcp.md tool sections and docs/mcp-pilot-guide.md Scenario 12 to mention the title-level contract. --- docs/mcp-pilot-guide.md | 2 + docs/mcp.md | 4 ++ scripts/pdx-482-validate.cjs | 53 +++++++++++++++++++++++-- src/mcp/tools/testCaseGenerate.ts | 8 +++- src/mcp/tools/testCaseStepTools.ts | 6 ++- test/unit/mcp/testCaseGenerate.test.ts | 45 +++++++++++++++++++-- test/unit/mcp/testCaseStepTools.test.ts | 45 +++++++++++++++++++-- 7 files changed, 152 insertions(+), 11 deletions(-) diff --git a/docs/mcp-pilot-guide.md b/docs/mcp-pilot-guide.md index 9fbaa55b..ce8f29b1 100644 --- a/docs/mcp-pilot-guide.md +++ b/docs/mcp-pilot-guide.md @@ -445,6 +445,8 @@ NitroX is Provar's Hybrid Model for locators — it maps Salesforce component-ba **Background:** A regression in 1.5.0 (PDX-479) traced to authoring guidance that steered LLMs toward a per-step construction pattern. Multi-call construction drops scenario numbers (e.g. Scenario 1 → Scenario 3, no Scenario 2), flattens asserts that should be nested inside `UiWithScreen` clauses, and produces inconsistent assert API IDs across the case. This scenario exists so the regression class is exercised in pilot evaluation and cannot recur silently. +**Title-level contract:** the chip-level `title` fields for the two tools — `Generate Test Case (full steps in one call)` and `Amend Existing Test Case Step` — carry the construct-vs-amend split at the tool-picker surface. MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references in chat threads) still expose the contract to the agent before any description is read. + **Prompt:** > "Create a Provar test case `AccountFlow.testcase` that covers three scenarios: diff --git a/docs/mcp.md b/docs/mcp.md index 7a08fed2..edbe5b84 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -703,6 +703,8 @@ Validates a Java Page Object source file against 30+ quality rules (structural c Generates an XML test case skeleton with UUID v4 guids and sequential `testItemId` values. +The tool's chip-level `title` — `Generate Test Case (full steps in one call)` — carries the construction contract so that MCP clients which render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references) surface the single-call requirement to the agent before any description is read. + > **Construction pattern (read first).** Pass the FULL step tree for the test case in a single call via the `steps[]` array. Do **not** call this tool with `steps: []` and then append steps via repeated `provar_testcase_step_edit` calls — that pattern drops scenarios, flattens nesting, and produces inconsistent step types. `provar_testcase_step_edit` is for **amending** an already-validated test case (single-step add, attribute fix, debug edit), not for **constructing** one from scratch. **Generated `` element structure (Provar requirements):** @@ -1547,6 +1549,8 @@ Salesforce DML error categories (`SALESFORCE_*`) represent test-data failures Atomically add or remove a single step (``) in a Provar XML test case file. Writes a `.bak` backup before mutating, runs structural validation after the edit, and automatically restores the backup if validation fails. +The tool's chip-level `title` — `Amend Existing Test Case Step` — signals the amendment-only contract in MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references). An agent that reads only the title still sees that this tool operates on an existing test case, not a new one. + > **When to use.** This tool is for **amending** an existing, already-validated test case (single-step add, attribute fix, debug edit). It is **not** for constructing a test case from scratch by calling it repeatedly after a `steps: []` `provar_testcase_generate`. Building a case step-by-step via repeated `step_edit` calls produces structurally invalid test cases (dropped scenarios, flat asserts, inconsistent step types). For new test cases, pass the full step tree to `provar_testcase_generate` in a single call. Prerequisites: the test case file must exist and be valid XML with a `` structure. diff --git a/scripts/pdx-482-validate.cjs b/scripts/pdx-482-validate.cjs index 5b4e5473..1d7b1e60 100644 --- a/scripts/pdx-482-validate.cjs +++ b/scripts/pdx-482-validate.cjs @@ -1,10 +1,14 @@ -// PDX-482 validation: confirm the construct/amend contract is reachable at the -// MCP protocol surface in BOTH standard and compact schema modes. +// PDX-482 / PDX-484 validation: confirm the construct/amend contract is reachable +// at the MCP protocol surface in BOTH standard and compact schema modes, AND in +// the `title:` field that some clients render exclusively (Claude Desktop chips, +// Cursor audit pane, inline tool-call refs). // // The LLM reads tools/list before every tool call, so every assertion here is // on bytes the LLM literally sees at the call site. Compact mode coverage is // critical because the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact -// silently swapped the description for a contract-free one-liner. +// silently swapped the description for a contract-free one-liner. Title-level +// coverage was added by PDX-484: the title field is independent of schema mode, +// but we assert it in both passes to surface drift early either way. // // yarn compile // node scripts/pdx-482-validate.cjs @@ -95,6 +99,43 @@ function runValidation(mode, extraEnv, runAssertions) { }); } +// ── PDX-484: title-level construct-vs-amend contract ─────────────────────── +// Title field is independent of schema mode, but we assert it in both passes +// to catch drift early regardless of which mode a future refactor breaks. +function titleAssertions(toolList, record) { + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + if (!gen) { + record('provar_testcase_generate has a title', false, 'tool not found'); + } else { + const t = gen.title ?? ''; + record( + 'generate.title carries "one call" or "single call" (PDX-484)', + t.includes('one call') || t.includes('single call'), + `title: ${JSON.stringify(t)}` + ); + record('generate.title mentions steps (PDX-484)', /step/i.test(t), 'chip-level payload shape must be visible'); + record('generate.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + } + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + if (!edit) { + record('provar_testcase_step_edit has a title', false, 'tool not found'); + } else { + const t = edit.title ?? ''; + record( + 'step_edit.title contains "Amend" or "amendment" (PDX-484)', + /amend/i.test(t), + `title: ${JSON.stringify(t)}` + ); + record( + 'step_edit.title signals "existing" test case only (PDX-484)', + /exist/i.test(t), + 'chip-level signal that this tool does not construct new cases' + ); + record('step_edit.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + } +} + // ── Assertions for standard mode (full TOOL_DESCRIPTION) ──────────────────── function standardAssertions(toolList, record) { const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); @@ -187,6 +228,9 @@ function standardAssertions(toolList, record) { 'consequence is explicit so the contract is judgement-friendly' ); } + + // PDX-484: title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); } // ── Assertions for compact mode (short one-liner) ─────────────────────────── @@ -236,6 +280,9 @@ function compactAssertions(toolList, record) { 'rejection must survive compact mode' ); } + + // PDX-484: title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); } (async () => { diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index 4d7d64dc..28e9601c 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -167,7 +167,13 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_generate', { - title: 'Generate Test Case', + // PDX-484: carry the construct-vs-amend contract into the `title:` field + // because many MCP clients (Claude Desktop tool-picker chips, Cursor audit + // pane, inline tool-call references in chat threads) render only the title. + // Without the "(full steps in one call)" suffix an agent that reads only + // the title surface gets zero PDX-479 protection. Length: 43 chars — + // well under the ~50 char comfort threshold for the clients we test. + title: 'Generate Test Case (full steps in one call)', description: desc( TOOL_DESCRIPTION, // PDX-482: the compact form must also carry the construction contract, diff --git a/src/mcp/tools/testCaseStepTools.ts b/src/mcp/tools/testCaseStepTools.ts index c4e5ecc1..bafb4e7d 100644 --- a/src/mcp/tools/testCaseStepTools.ts +++ b/src/mcp/tools/testCaseStepTools.ts @@ -86,7 +86,11 @@ export function registerTestCaseStepEdit(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_step_edit', { - title: 'Edit Test Case Step', + // PDX-484: carry the AMENDMENT-ONLY contract into the `title:` field. + // "Amend" mirrors the AMENDMENT-ONLY framing in the description body + // and "Existing" signals that the tool does not construct new test cases. + // Length: 29 chars — well within the chip-render comfort threshold. + title: 'Amend Existing Test Case Step', description: desc( [ // ── Usage contract (READ FIRST — PDX-482) ───────────────────────────── diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts index b2049463..e9d6e445 100644 --- a/test/unit/mcp/testCaseGenerate.test.ts +++ b/test/unit/mcp/testCaseGenerate.test.ts @@ -21,7 +21,9 @@ import type { ServerConfig } from '../../../src/mcp/server.js'; type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _description: string, _schema: unknown, handler: ToolHandler): void { @@ -30,8 +32,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -151,6 +161,35 @@ describe('provar_testcase_generate description', () => { ); }); + // ── PDX-484: title-level construct-vs-amend contract ────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the single-call construction contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.includes('one call') || reg.title.includes('single call'), + 'title must contain "one call" or "single call" so the contract is visible in tool-picker chips' + ); + assert.ok( + /step/i.test(reg.title), + 'title must mention steps so the LLM sees the payload shape at the chip-level surface' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); + // ── PDX-482 hardening: compact-mode coverage (adversarial review fix) ────── // PROVAR_MCP_SCHEMA_MODE=compact swaps the entire description for a short // one-liner. Without this guard, compact mode is a regression highway: diff --git a/test/unit/mcp/testCaseStepTools.test.ts b/test/unit/mcp/testCaseStepTools.test.ts index 17ca038b..c353d264 100644 --- a/test/unit/mcp/testCaseStepTools.test.ts +++ b/test/unit/mcp/testCaseStepTools.test.ts @@ -18,7 +18,9 @@ import { registerAllTestCaseStepTools } from '../../../src/mcp/tools/testCaseSte type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _desc: string, _schema: unknown, handler: ToolHandler): void { @@ -27,8 +29,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -145,6 +155,35 @@ describe('provar_testcase_step_edit description', () => { 'description must call out "inconsistent step types" (the third observable defect)' ); }); + + // ── PDX-484: title-level amendment-only contract ─────────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the amendment-only contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + /amend/i.test(reg.title), + 'title must contain "Amend" or "amendment" so the contract is visible in tool-picker chips' + ); + assert.ok( + /exist/i.test(reg.title), + 'title must signal "existing test case only" so an agent reading only the chip does not call this for construction' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); }); // ── provar_testcase_step_edit ──────────────────────────────────────────────────