diff --git a/CLAUDE.md b/CLAUDE.md index 9251476c..0559d948 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -104,3 +104,16 @@ The project uses ESLint with `@typescript-eslint` strict rules. Common gotchas: - `camelcase` — `nitroX` is valid camelCase (capital X starts the next word) CI runs lint as part of `sf-prepack` — do not skip with `--no-verify` on the final merge commit. + +--- + +## Script naming convention + +Files anywhere under `scripts/` (including nested subdirectories) must be named for what they **do**, not for the ticket that prompted them. Ticket-prefixed names (e.g. `pdx-482-validate.cjs`) leak internal Jira plumbing into the file tree, age poorly once the ticket closes, and surface in customer-visible artifacts (CI logs, PR diffs, repo browsing). + +- **Allowed:** `authoring-flow-trace.cjs`, `construction-contract-validate.cjs`, `mcp-smoke.cjs`, `fetch-nitrox-packages.cjs` +- **Rejected:** `pdx-482-validate.cjs`, `PDX_481_trace.cjs`, `scripts/tmp/pdx-999.cjs`, anything whose basename matches `^pdx[-_]?\d+` (case-insensitive) + +Enforced by `scripts/lint-script-names.cjs`, which walks `scripts/` recursively and runs as a dependency of `yarn lint` (wireit `lint:script-names`). The check fails the lint step if any ticket-prefixed filename appears at any depth. + +Ticket IDs and rationale belong in commit messages and PR descriptions, not in filenames or in user-facing docs (`docs/mcp.md`, `docs/mcp-pilot-guide.md`). diff --git a/package.json b/package.json index 09157474..38248d87 100644 --- a/package.json +++ b/package.json @@ -180,6 +180,9 @@ }, "lint": { "command": "eslint src test --color --cache --cache-location .eslintcache", + "dependencies": [ + "lint:script-names" + ], "files": [ "src/**/*.ts", "test/**/*.ts", @@ -189,6 +192,14 @@ ], "output": [] }, + "lint:script-names": { + "command": "node scripts/lint-script-names.cjs", + "files": [ + "scripts/lint-script-names.cjs", + "scripts/**/*" + ], + "output": [] + }, "test:compile": { "command": "tsc -p \"./test\" --pretty", "files": [ diff --git a/scripts/pdx-481-trace.cjs b/scripts/authoring-flow-trace.cjs similarity index 96% rename from scripts/pdx-481-trace.cjs rename to scripts/authoring-flow-trace.cjs index 8e5296aa..06d65311 100644 --- a/scripts/pdx-481-trace.cjs +++ b/scripts/authoring-flow-trace.cjs @@ -1,8 +1,8 @@ -// PDX-481 prompt-flow trace. +// Authoring-flow trace. // -// Drives the patched MCP server over JSON-RPC stdio and captures the EXACT -// bytes that an MCP client (Claude Desktop / Cursor / etc.) would surface to -// its LLM at every decision point in the test-authoring flow: +// Drives the MCP server over JSON-RPC stdio and captures the EXACT bytes that +// an MCP client (Claude Desktop / Cursor / etc.) would surface to its LLM at +// every decision point in the test-authoring flow: // // 1. The orchestration prompt the LLM reads when planning ("I want to author a new test case") // 2. The tool-guide resource the LLM reads when picking the right tool @@ -11,7 +11,7 @@ // 5. The actual XML the tool emits when given a real multi-scenario payload // // Run from the worktree root after `yarn compile`: -// node scripts/pdx-481-trace.cjs +// node scripts/authoring-flow-trace.cjs 'use strict'; @@ -97,7 +97,7 @@ function extractSection(text, headerRegex, nextHeaderRegex) { await rpc('initialize', { protocolVersion: '2024-11-05', capabilities: {}, - clientInfo: { name: 'pdx-481-trace', version: '1.0.0' }, + clientInfo: { name: 'authoring-flow-trace', version: '1.0.0' }, }); // ── 1. The orchestration prompt's author-test flow ──────────────────────── diff --git a/scripts/pdx-481-validate.cjs b/scripts/authoring-guidance-validate.cjs similarity index 85% rename from scripts/pdx-481-validate.cjs rename to scripts/authoring-guidance-validate.cjs index 98aa6f61..bbe6aa44 100644 --- a/scripts/pdx-481-validate.cjs +++ b/scripts/authoring-guidance-validate.cjs @@ -1,9 +1,10 @@ -// PDX-481: server-side validation that the rewritten author-test guidance is -// reachable and contains the canonical single-call construction copy. Runs -// without requiring sf CLI to be linked to the local plugin. +// Authoring-guidance validation: confirm the author-test guidance (prompt + +// step-reference resource) is reachable and contains the canonical single-call +// construction copy. Runs without requiring sf CLI to be linked to the local +// plugin. // // yarn compile -// node scripts/pdx-481-validate.cjs +// node scripts/authoring-guidance-validate.cjs 'use strict'; @@ -66,11 +67,11 @@ function record(label, ok, detail) { await rpc('initialize', { protocolVersion: '2024-11-05', capabilities: {}, - clientInfo: { name: 'pdx-481-validate', version: '1.0.0' }, + clientInfo: { name: 'authoring-guidance-validate', version: '1.0.0' }, }); - // The orchestration prompt should still be registered (PDX-481 keeps it, - // unlike PDX-480 which disabled it). + // The orchestration prompt must remain registered; the author-test flow + // depends on it as the LLM's entry point. const orch = await rpc('prompts/get', { name: 'provar.guide.orchestration', arguments: { task: 'author-test' }, @@ -94,7 +95,7 @@ function record(label, ok, detail) { ); } - // PDX-479 anti-patterns + // Multi-call construction anti-patterns const mustExclude = ['repeat per step']; for (const phrase of mustExclude) { const present = text.includes(phrase); @@ -115,7 +116,7 @@ function record(label, ok, detail) { : `split confirmed` ); - // Tool-guide resource should still serve content (PDX-481 keeps it). + // Tool-guide resource must serve content; LLMs read it when picking a tool. const guide = await rpc('resources/read', { uri: 'provar://docs/tool-guide' }); const gcontent = guide.result?.contents?.[0]?.text ?? ''; record( @@ -146,7 +147,7 @@ function record(label, ok, detail) { fail++; } } - console.log(`\nPDX-481 validation: ${pass} passed, ${fail} failed`); + console.log(`\nAuthoring-guidance validation: ${pass} passed, ${fail} failed`); server.stdin.end(); process.exit(fail > 0 ? 1 : 0); diff --git a/scripts/pdx-482-validate.cjs b/scripts/construction-contract-validate.cjs similarity index 82% rename from scripts/pdx-482-validate.cjs rename to scripts/construction-contract-validate.cjs index 01537deb..ee2f89cd 100644 --- a/scripts/pdx-482-validate.cjs +++ b/scripts/construction-contract-validate.cjs @@ -1,27 +1,28 @@ -// PDX-482 / PDX-483 / PDX-484 validation: confirm the construct/amend contract -// is reachable at every MCP protocol surface the LLM sees, and that the runtime +// Construction-contract validation: confirm the construct/amend contract is +// reachable at every MCP protocol surface the LLM sees, and that the runtime // guard rejects the multi-call construction shape. // -// PDX-482 — description contract (standard + compact schema modes): assertions -// on tools/list description bodies. Compact mode coverage is critical because -// the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact silently -// swapped the description for a contract-free one-liner. +// Description-contract pass (standard + compact schema modes): assertions on +// tools/list description bodies — every byte the LLM literally sees at the +// call site. Compact mode coverage is critical because +// PROVAR_MCP_SCHEMA_MODE=compact swaps the description for a short one-liner; +// if the contract isn't in that form, compact mode becomes a regression vector. // -// PDX-483 — runtime guard: drives a real tools/call with the rejected shape -// (steps:[]+dry_run:false+output_path) and asserts the response is a structured -// STEPS_REQUIRED error with a non-empty details.suggestion. This catches a -// regression class that the tools/list assertions cannot reach: the passive -// contract surviving in the description while the active guard silently +// Runtime-guard pass: drives a real tools/call with the rejected shape +// (steps:[] + dry_run:false + output_path) and asserts the response is a +// structured STEPS_REQUIRED error with a non-empty details.suggestion. This +// catches a regression that the description-pass assertions cannot reach: the +// passive contract surviving in the description while the active guard silently // regresses (e.g. a refactor reorders the handler so writes happen before the // check). // -// PDX-484 — title contract: assertions on the `title:` field that some clients +// Title-contract pass: assertions on the `title:` field that some clients // render exclusively (Claude Desktop chips, Cursor audit pane, inline tool-call // refs). Titles are schema-mode-independent but we assert in both passes to // surface drift early either way. // // yarn compile -// node scripts/pdx-482-validate.cjs +// node scripts/construction-contract-validate.cjs 'use strict'; @@ -96,7 +97,7 @@ function runValidation(mode, extraEnv, runAssertions) { await rpc('initialize', { protocolVersion: '2024-11-05', capabilities: {}, - clientInfo: { name: 'pdx-482-validate', version: '1.0.0' }, + clientInfo: { name: 'construction-contract-validate', version: '1.0.0' }, }); const tools = await rpc('tools/list', {}); const toolList = tools.result?.tools ?? []; @@ -110,9 +111,11 @@ function runValidation(mode, extraEnv, runAssertions) { }); } -// ── PDX-484: title-level construct-vs-amend contract ─────────────────────── -// Title field is independent of schema mode, but we assert it in both passes -// to catch drift early regardless of which mode a future refactor breaks. +// ── Title-level construct-vs-amend contract ──────────────────────────────── +// The `title:` field is independent of schema mode, but we assert it in both +// passes to catch drift early regardless of which mode a future refactor breaks. +// Many MCP clients render only the title field in tool-picker chips, so the +// contract must survive at that surface too. function titleAssertions(toolList, record) { const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); if (!gen) { @@ -120,12 +123,12 @@ function titleAssertions(toolList, record) { } else { const t = gen.title ?? ''; record( - 'generate.title carries "one call" or "single call" (PDX-484)', + 'generate.title carries "one call" or "single call"', t.includes('one call') || t.includes('single call'), `title: ${JSON.stringify(t)}` ); - record('generate.title mentions steps (PDX-484)', /step/i.test(t), 'chip-level payload shape must be visible'); - record('generate.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + record('generate.title mentions steps', /step/i.test(t), 'chip-level payload shape must be visible'); + record('generate.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`); } const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); @@ -133,17 +136,13 @@ function titleAssertions(toolList, record) { record('provar_testcase_step_edit has a title', false, 'tool not found'); } else { const t = edit.title ?? ''; + record('step_edit.title contains "Amend" or "amendment"', /amend/i.test(t), `title: ${JSON.stringify(t)}`); record( - 'step_edit.title contains "Amend" or "amendment" (PDX-484)', - /amend/i.test(t), - `title: ${JSON.stringify(t)}` - ); - record( - 'step_edit.title signals "existing" test case only (PDX-484)', + 'step_edit.title signals "existing" test case only', /exist/i.test(t), 'chip-level signal that this tool does not construct new cases' ); - record('step_edit.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`); + record('step_edit.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`); } } @@ -162,7 +161,7 @@ function standardAssertions(toolList, record) { record( 'generate.description contains "single call"', d.includes('single call'), - 'protects against PDX-479 regression at call site' + 'protects against the multi-call construction regression at call site' ); record( 'generate.description contains "FULL step tree"', @@ -176,8 +175,8 @@ function standardAssertions(toolList, record) { ); record( 'generate.description rejects CONSTRUCTING via step_edit', - // PDX-482 hardening: literal substring (not regex) — the previous regex - // would false-positive on hostile rewordings like "constructing...not via generate". + // Literal substring (not regex) — a regex match would false-positive on + // hostile rewordings like "constructing...not via generate". d.includes('not for CONSTRUCTING one from scratch'), 'literal canonical phrase: "not for CONSTRUCTING one from scratch"' ); @@ -240,13 +239,13 @@ function standardAssertions(toolList, record) { ); } - // PDX-484: title-level contract — runs in both modes to surface drift. + // Title-level contract — runs in both modes to surface drift. titleAssertions(toolList, record); } // ── Assertions for compact mode (short one-liner) ─────────────────────────── -// Adversarial review (Critical #1): the compact form must STILL carry the -// contract or PROVAR_MCP_SCHEMA_MODE=compact becomes a regression highway. +// The compact form must STILL carry the contract or PROVAR_MCP_SCHEMA_MODE=compact +// becomes a regression highway (the standard description is swapped out entirely). function compactAssertions(toolList, record) { const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); if (!gen) { @@ -269,7 +268,7 @@ function compactAssertions(toolList, record) { 'must split AMENDING (step_edit) vs CONSTRUCTING (generate) in the compact form' ); record( - 'compact generate.description does NOT regress to the pre-PDX-482 contract-free form', + 'compact generate.description does NOT regress to a contract-free one-liner', !/^Generate a Provar XML test case skeleton with UUID guids and steps structure\.?$/.test(d), 'old compact form must be replaced' ); @@ -292,11 +291,11 @@ function compactAssertions(toolList, record) { ); } - // PDX-484: title-level contract — runs in both modes to surface drift. + // Title-level contract — runs in both modes to surface drift. titleAssertions(toolList, record); } -// ── PDX-483 runtime guard: tools/call assertion ───────────────────────────── +// ── Runtime guard: tools/call assertion ───────────────────────────────────── // Drives a real tools/call(provar_testcase_generate, ...) with the rejected // shape (steps:[] + dry_run:false + output_path) and asserts the response is // a structured STEPS_REQUIRED error. This is the only check that catches a @@ -357,11 +356,11 @@ function runRuntimeGuardValidation() { await rpc('initialize', { protocolVersion: '2024-11-05', capabilities: {}, - clientInfo: { name: 'pdx-483-validate', version: '1.0.0' }, + clientInfo: { name: 'construction-contract-validate-runtime', version: '1.0.0' }, }); // Use a unique tmp path so a leftover file from a prior run can't mask the assertion. - const outPath = path.join(TMP, `pdx483-validate-${Date.now()}.testcase`); + const outPath = path.join(TMP, `construction-contract-validate-${Date.now()}.testcase`); try { if (fs.existsSync(outPath)) fs.unlinkSync(outPath); } catch { @@ -371,7 +370,7 @@ function runRuntimeGuardValidation() { const callRes = await rpc('tools/call', { name: 'provar_testcase_generate', arguments: { - test_case_name: 'PDX-483 validate', + test_case_name: 'construction-contract validate', steps: [], dry_run: false, output_path: outPath, @@ -457,7 +456,7 @@ function runRuntimeGuardValidation() { fail++; } } - console.log(`\nPDX-482/PDX-483 validation: ${pass} passed, ${fail} failed`); + console.log(`\nConstruction-contract validation: ${pass} passed, ${fail} failed`); process.exit(fail > 0 ? 1 : 0); })().catch((err) => { console.error('Validation script error:', err); diff --git a/scripts/lint-script-names.cjs b/scripts/lint-script-names.cjs new file mode 100644 index 00000000..68a83442 --- /dev/null +++ b/scripts/lint-script-names.cjs @@ -0,0 +1,55 @@ +// Script-name lint: enforces the convention that files under scripts/ are +// named by what they DO, not by which ticket prompted them. +// +// Why: ticket-prefixed filenames anchor the codebase to internal Jira IDs, +// confuse future readers when the original ticket is closed/archived, and +// leak internal process language into customer-visible artifacts (CI logs, +// PR diffs, file trees that pilots may receive). Behaviour-named scripts +// stay readable as the codebase evolves. +// +// Rule: no file ANYWHERE under scripts/ (including nested subdirectories) +// may have a basename matching /^pdx[-_]?\d+/i. The walk is recursive so a +// nested `scripts/tmp/pdx-123.cjs` does not bypass the gate. +// +// Run: +// node scripts/lint-script-names.cjs +// Or via the lint chain: +// yarn lint # wireit runs lint:script-names as a dependency + +'use strict'; + +const fs = require('node:fs'); +const path = require('node:path'); + +const SCRIPTS_DIR = path.resolve(__dirname); +const TICKET_PREFIX_RE = /^pdx[-_]?\d+/i; + +function walk(dir) { + const out = []; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + out.push(...walk(full)); + } else if (entry.isFile()) { + out.push(full); + } + } + return out; +} + +const offenders = walk(SCRIPTS_DIR) + .filter((full) => TICKET_PREFIX_RE.test(path.basename(full))) + .map((full) => path.relative(path.dirname(SCRIPTS_DIR), full).replace(/\\/g, '/')) + .sort(); + +if (offenders.length === 0) { + console.log('lint-script-names: OK (no ticket-prefixed script filenames under scripts/)'); + process.exit(0); +} + +console.error('lint-script-names: FAIL — scripts/ contains ticket-prefixed filenames:'); +for (const rel of offenders) console.error(` - ${rel}`); +console.error( + '\nRename each file to describe what it DOES, not which ticket added it (e.g. `authoring-flow-trace.cjs` instead of `pdx-481-trace.cjs`).' +); +process.exit(1); diff --git a/scripts/mcp-smoke.cjs b/scripts/mcp-smoke.cjs index 50cf628d..6cea444b 100644 --- a/scripts/mcp-smoke.cjs +++ b/scripts/mcp-smoke.cjs @@ -177,17 +177,18 @@ async function runTests() { dry_run: true, }); - // ── 6b. provar_testcase_generate STEPS_REQUIRED runtime guard (PDX-483) ─── + // ── 6b. provar_testcase_generate STEPS_REQUIRED runtime guard ──────────── // Drives the rejected shape (steps:[] + dry_run:false + output_path) so the - // PDX-479 regression-class shape is exercised on every smoke run. The smoke + // multi-call construction shape is exercised on every smoke run. The smoke // framework counts any JSON-RPC response as PASS; the assertion that the - // body carries error_code='STEPS_REQUIRED' lives in scripts/pdx-482-validate.cjs. + // body carries error_code='STEPS_REQUIRED' lives in + // scripts/construction-contract-validate.cjs. if (inGroup('authoring')) await callTool('provar_testcase_generate', { - test_case_name: 'PDX-483 Guard Smoke', + test_case_name: 'STEPS_REQUIRED Guard Smoke', steps: [], dry_run: false, - output_path: path.join(TMP, 'pdx483-smoke-rejected.testcase'), + output_path: path.join(TMP, 'steps-required-smoke-rejected.testcase'), }); // ── 7. provar_testcase_validate ─────────────────────────────────────────── diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index 21ee6a70..6e97b736 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -262,15 +262,16 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig target_uri: input.target_uri, }); - // PDX-483: active runtime guard for the PDX-479 regression pattern. - // Rejects the exact shape that produces a contract-violating skeleton on - // disk: empty steps[] + non-dry-run + persistence target. Other empty- - // steps shapes (dry_run preview, no output_path) remain allowed. + // Runtime guard for the multi-call construction pattern: rejects the exact + // shape that produces a contract-violating skeleton on disk — empty steps[] + // + non-dry-run + persistence target. Other empty-steps shapes (dry-run + // preview, no output_path) remain allowed. if (input.steps.length === 0 && !input.dry_run && input.output_path) { const err = makeError( 'STEPS_REQUIRED', 'provar_testcase_generate was called with an empty steps[] array and a target output_path. ' + - 'This produces a contract-violating skeleton (the PDX-479 regression pattern) and is rejected.', + 'Constructing a test case requires the full step tree in a single call; ' + + 'an empty payload on the write path would produce a skeleton-only file.', requestId, false, {