Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,16 @@ The project uses ESLint with `@typescript-eslint` strict rules. Common gotchas:
- `camelcase` — `nitroX` is valid camelCase (capital X starts the next word)

CI runs lint as part of `sf-prepack` — do not skip with `--no-verify` on the final merge commit.

---

## Script naming convention

Files anywhere under `scripts/` (including nested subdirectories) must be named for what they **do**, not for the ticket that prompted them. Ticket-prefixed names (e.g. `pdx-482-validate.cjs`) leak internal Jira plumbing into the file tree, age poorly once the ticket closes, and surface in customer-visible artifacts (CI logs, PR diffs, repo browsing).

- **Allowed:** `authoring-flow-trace.cjs`, `construction-contract-validate.cjs`, `mcp-smoke.cjs`, `fetch-nitrox-packages.cjs`
- **Rejected:** `pdx-482-validate.cjs`, `PDX_481_trace.cjs`, `scripts/tmp/pdx-999.cjs`, anything whose basename matches `^pdx[-_]?\d+` (case-insensitive)

Enforced by `scripts/lint-script-names.cjs`, which walks `scripts/` recursively and runs as a dependency of `yarn lint` (wireit `lint:script-names`). The check fails the lint step if any ticket-prefixed filename appears at any depth.

Ticket IDs and rationale belong in commit messages and PR descriptions, not in filenames or in user-facing docs (`docs/mcp.md`, `docs/mcp-pilot-guide.md`).
11 changes: 11 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@
},
"lint": {
"command": "eslint src test --color --cache --cache-location .eslintcache",
"dependencies": [
"lint:script-names"
],
"files": [
"src/**/*.ts",
"test/**/*.ts",
Expand All @@ -189,6 +192,14 @@
],
"output": []
},
"lint:script-names": {
"command": "node scripts/lint-script-names.cjs",
"files": [
"scripts/lint-script-names.cjs",
"scripts/**/*"
],
"output": []
},
"test:compile": {
"command": "tsc -p \"./test\" --pretty",
"files": [
Expand Down
12 changes: 6 additions & 6 deletions scripts/pdx-481-trace.cjs → scripts/authoring-flow-trace.cjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// PDX-481 prompt-flow trace.
// Authoring-flow trace.
//
// Drives the patched MCP server over JSON-RPC stdio and captures the EXACT
// bytes that an MCP client (Claude Desktop / Cursor / etc.) would surface to
// its LLM at every decision point in the test-authoring flow:
// Drives the MCP server over JSON-RPC stdio and captures the EXACT bytes that
// an MCP client (Claude Desktop / Cursor / etc.) would surface to its LLM at
// every decision point in the test-authoring flow:
//
// 1. The orchestration prompt the LLM reads when planning ("I want to author a new test case")
// 2. The tool-guide resource the LLM reads when picking the right tool
Expand All @@ -11,7 +11,7 @@
// 5. The actual XML the tool emits when given a real multi-scenario payload
//
// Run from the worktree root after `yarn compile`:
// node scripts/pdx-481-trace.cjs
// node scripts/authoring-flow-trace.cjs

'use strict';

Expand Down Expand Up @@ -97,7 +97,7 @@ function extractSection(text, headerRegex, nextHeaderRegex) {
await rpc('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'pdx-481-trace', version: '1.0.0' },
clientInfo: { name: 'authoring-flow-trace', version: '1.0.0' },
});

// ── 1. The orchestration prompt's author-test flow ────────────────────────
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
// PDX-481: server-side validation that the rewritten author-test guidance is
// reachable and contains the canonical single-call construction copy. Runs
// without requiring sf CLI to be linked to the local plugin.
// Authoring-guidance validation: confirm the author-test guidance (prompt +
// step-reference resource) is reachable and contains the canonical single-call
// construction copy. Runs without requiring sf CLI to be linked to the local
// plugin.
//
// yarn compile
// node scripts/pdx-481-validate.cjs
// node scripts/authoring-guidance-validate.cjs

'use strict';

Expand Down Expand Up @@ -66,11 +67,11 @@ function record(label, ok, detail) {
await rpc('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'pdx-481-validate', version: '1.0.0' },
clientInfo: { name: 'authoring-guidance-validate', version: '1.0.0' },
});

// The orchestration prompt should still be registered (PDX-481 keeps it,
// unlike PDX-480 which disabled it).
// The orchestration prompt must remain registered; the author-test flow
// depends on it as the LLM's entry point.
const orch = await rpc('prompts/get', {
name: 'provar.guide.orchestration',
arguments: { task: 'author-test' },
Expand All @@ -94,7 +95,7 @@ function record(label, ok, detail) {
);
}

// PDX-479 anti-patterns
// Multi-call construction anti-patterns
const mustExclude = ['repeat per step'];
for (const phrase of mustExclude) {
const present = text.includes(phrase);
Expand All @@ -115,7 +116,7 @@ function record(label, ok, detail) {
: `split confirmed`
);

// Tool-guide resource should still serve content (PDX-481 keeps it).
// Tool-guide resource must serve content; LLMs read it when picking a tool.
const guide = await rpc('resources/read', { uri: 'provar://docs/tool-guide' });
const gcontent = guide.result?.contents?.[0]?.text ?? '';
record(
Expand Down Expand Up @@ -146,7 +147,7 @@ function record(label, ok, detail) {
fail++;
}
}
console.log(`\nPDX-481 validation: ${pass} passed, ${fail} failed`);
console.log(`\nAuthoring-guidance validation: ${pass} passed, ${fail} failed`);

server.stdin.end();
process.exit(fail > 0 ? 1 : 0);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
// PDX-482 / PDX-483 / PDX-484 validation: confirm the construct/amend contract
// is reachable at every MCP protocol surface the LLM sees, and that the runtime
// Construction-contract validation: confirm the construct/amend contract is
// reachable at every MCP protocol surface the LLM sees, and that the runtime
// guard rejects the multi-call construction shape.
//
// PDX-482 — description contract (standard + compact schema modes): assertions
// on tools/list description bodies. Compact mode coverage is critical because
// the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact silently
// swapped the description for a contract-free one-liner.
// Description-contract pass (standard + compact schema modes): assertions on
// tools/list description bodies — every byte the LLM literally sees at the
// call site. Compact mode coverage is critical because
// PROVAR_MCP_SCHEMA_MODE=compact swaps the description for a short one-liner;
// if the contract isn't in that form, compact mode becomes a regression vector.
//
// PDX-483 — runtime guard: drives a real tools/call with the rejected shape
// (steps:[]+dry_run:false+output_path) and asserts the response is a structured
// STEPS_REQUIRED error with a non-empty details.suggestion. This catches a
// regression class that the tools/list assertions cannot reach: the passive
// contract surviving in the description while the active guard silently
// Runtime-guard pass: drives a real tools/call with the rejected shape
// (steps:[] + dry_run:false + output_path) and asserts the response is a
// structured STEPS_REQUIRED error with a non-empty details.suggestion. This
// catches a regression that the description-pass assertions cannot reach: the
// passive contract surviving in the description while the active guard silently
// regresses (e.g. a refactor reorders the handler so writes happen before the
// check).
//
// PDX-484 — title contract: assertions on the `title:` field that some clients
// Title-contract pass: assertions on the `title:` field that some clients
// render exclusively (Claude Desktop chips, Cursor audit pane, inline tool-call
// refs). Titles are schema-mode-independent but we assert in both passes to
// surface drift early either way.
//
// yarn compile
// node scripts/pdx-482-validate.cjs
// node scripts/construction-contract-validate.cjs

'use strict';

Expand Down Expand Up @@ -96,7 +97,7 @@ function runValidation(mode, extraEnv, runAssertions) {
await rpc('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'pdx-482-validate', version: '1.0.0' },
clientInfo: { name: 'construction-contract-validate', version: '1.0.0' },
});
const tools = await rpc('tools/list', {});
const toolList = tools.result?.tools ?? [];
Expand All @@ -110,40 +111,38 @@ function runValidation(mode, extraEnv, runAssertions) {
});
}

// ── PDX-484: title-level construct-vs-amend contract ───────────────────────
// Title field is independent of schema mode, but we assert it in both passes
// to catch drift early regardless of which mode a future refactor breaks.
// ── Title-level construct-vs-amend contract ────────────────────────────────
// The `title:` field is independent of schema mode, but we assert it in both
// passes to catch drift early regardless of which mode a future refactor breaks.
// Many MCP clients render only the title field in tool-picker chips, so the
// contract must survive at that surface too.
function titleAssertions(toolList, record) {
const gen = toolList.find((t) => t.name === 'provar_testcase_generate');
if (!gen) {
record('provar_testcase_generate has a title', false, 'tool not found');
} else {
const t = gen.title ?? '';
record(
'generate.title carries "one call" or "single call" (PDX-484)',
'generate.title carries "one call" or "single call"',
t.includes('one call') || t.includes('single call'),
`title: ${JSON.stringify(t)}`
);
record('generate.title mentions steps (PDX-484)', /step/i.test(t), 'chip-level payload shape must be visible');
record('generate.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`);
record('generate.title mentions steps', /step/i.test(t), 'chip-level payload shape must be visible');
record('generate.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`);
}

const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit');
if (!edit) {
record('provar_testcase_step_edit has a title', false, 'tool not found');
} else {
const t = edit.title ?? '';
record('step_edit.title contains "Amend" or "amendment"', /amend/i.test(t), `title: ${JSON.stringify(t)}`);
record(
'step_edit.title contains "Amend" or "amendment" (PDX-484)',
/amend/i.test(t),
`title: ${JSON.stringify(t)}`
);
record(
'step_edit.title signals "existing" test case only (PDX-484)',
'step_edit.title signals "existing" test case only',
/exist/i.test(t),
'chip-level signal that this tool does not construct new cases'
);
record('step_edit.title length ≤ 50 chars (PDX-484)', t.length <= 50, `length: ${t.length}`);
record('step_edit.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`);
}
}

Expand All @@ -162,7 +161,7 @@ function standardAssertions(toolList, record) {
record(
'generate.description contains "single call"',
d.includes('single call'),
'protects against PDX-479 regression at call site'
'protects against the multi-call construction regression at call site'
);
record(
'generate.description contains "FULL step tree"',
Expand All @@ -176,8 +175,8 @@ function standardAssertions(toolList, record) {
);
record(
'generate.description rejects CONSTRUCTING via step_edit',
// PDX-482 hardening: literal substring (not regex) — the previous regex
// would false-positive on hostile rewordings like "constructing...not via generate".
// Literal substring (not regex) — a regex match would false-positive on
// hostile rewordings like "constructing...not via generate".
d.includes('not for CONSTRUCTING one from scratch'),
'literal canonical phrase: "not for CONSTRUCTING one from scratch"'
);
Expand Down Expand Up @@ -240,13 +239,13 @@ function standardAssertions(toolList, record) {
);
}

// PDX-484: title-level contract — runs in both modes to surface drift.
// Title-level contract — runs in both modes to surface drift.
titleAssertions(toolList, record);
}

// ── Assertions for compact mode (short one-liner) ───────────────────────────
// Adversarial review (Critical #1): the compact form must STILL carry the
// contract or PROVAR_MCP_SCHEMA_MODE=compact becomes a regression highway.
// The compact form must STILL carry the contract or PROVAR_MCP_SCHEMA_MODE=compact
// becomes a regression highway (the standard description is swapped out entirely).
function compactAssertions(toolList, record) {
const gen = toolList.find((t) => t.name === 'provar_testcase_generate');
if (!gen) {
Expand All @@ -269,7 +268,7 @@ function compactAssertions(toolList, record) {
'must split AMENDING (step_edit) vs CONSTRUCTING (generate) in the compact form'
);
record(
'compact generate.description does NOT regress to the pre-PDX-482 contract-free form',
'compact generate.description does NOT regress to a contract-free one-liner',
!/^Generate a Provar XML test case skeleton with UUID guids and steps structure\.?$/.test(d),
'old compact form must be replaced'
);
Expand All @@ -292,11 +291,11 @@ function compactAssertions(toolList, record) {
);
}

// PDX-484: title-level contract — runs in both modes to surface drift.
// Title-level contract — runs in both modes to surface drift.
titleAssertions(toolList, record);
}

// ── PDX-483 runtime guard: tools/call assertion ─────────────────────────────
// ── Runtime guard: tools/call assertion ─────────────────────────────────────
// Drives a real tools/call(provar_testcase_generate, ...) with the rejected
// shape (steps:[] + dry_run:false + output_path) and asserts the response is
// a structured STEPS_REQUIRED error. This is the only check that catches a
Expand Down Expand Up @@ -357,11 +356,11 @@ function runRuntimeGuardValidation() {
await rpc('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'pdx-483-validate', version: '1.0.0' },
clientInfo: { name: 'construction-contract-validate-runtime', version: '1.0.0' },
});

// Use a unique tmp path so a leftover file from a prior run can't mask the assertion.
const outPath = path.join(TMP, `pdx483-validate-${Date.now()}.testcase`);
const outPath = path.join(TMP, `construction-contract-validate-${Date.now()}.testcase`);
try {
if (fs.existsSync(outPath)) fs.unlinkSync(outPath);
} catch {
Expand All @@ -371,7 +370,7 @@ function runRuntimeGuardValidation() {
const callRes = await rpc('tools/call', {
name: 'provar_testcase_generate',
arguments: {
test_case_name: 'PDX-483 validate',
test_case_name: 'construction-contract validate',
steps: [],
dry_run: false,
output_path: outPath,
Expand Down Expand Up @@ -457,7 +456,7 @@ function runRuntimeGuardValidation() {
fail++;
}
}
console.log(`\nPDX-482/PDX-483 validation: ${pass} passed, ${fail} failed`);
console.log(`\nConstruction-contract validation: ${pass} passed, ${fail} failed`);
process.exit(fail > 0 ? 1 : 0);
})().catch((err) => {
console.error('Validation script error:', err);
Expand Down
55 changes: 55 additions & 0 deletions scripts/lint-script-names.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Script-name lint: enforces the convention that files under scripts/ are
// named by what they DO, not by which ticket prompted them.
//
// Why: ticket-prefixed filenames anchor the codebase to internal Jira IDs,
// confuse future readers when the original ticket is closed/archived, and
// leak internal process language into customer-visible artifacts (CI logs,
// PR diffs, file trees that pilots may receive). Behaviour-named scripts
// stay readable as the codebase evolves.
//
// Rule: no file ANYWHERE under scripts/ (including nested subdirectories)
// may have a basename matching /^pdx[-_]?\d+/i. The walk is recursive so a
// nested `scripts/tmp/pdx-123.cjs` does not bypass the gate.
//
// Run:
// node scripts/lint-script-names.cjs
// Or via the lint chain:
// yarn lint # wireit runs lint:script-names as a dependency

'use strict';

const fs = require('node:fs');
const path = require('node:path');

const SCRIPTS_DIR = path.resolve(__dirname);
const TICKET_PREFIX_RE = /^pdx[-_]?\d+/i;

function walk(dir) {
const out = [];
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
out.push(...walk(full));
} else if (entry.isFile()) {
out.push(full);
}
}
return out;
}

const offenders = walk(SCRIPTS_DIR)
.filter((full) => TICKET_PREFIX_RE.test(path.basename(full)))
.map((full) => path.relative(path.dirname(SCRIPTS_DIR), full).replace(/\\/g, '/'))
.sort();

if (offenders.length === 0) {
console.log('lint-script-names: OK (no ticket-prefixed script filenames under scripts/)');
process.exit(0);
}

console.error('lint-script-names: FAIL — scripts/ contains ticket-prefixed filenames:');
for (const rel of offenders) console.error(` - ${rel}`);
console.error(
'\nRename each file to describe what it DOES, not which ticket added it (e.g. `authoring-flow-trace.cjs` instead of `pdx-481-trace.cjs`).'
);
process.exit(1);
Loading
Loading