ProvarTesting · mrdailey99 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/docs/mcp-pilot-guide.md b/docs/mcp-pilot-guide.md
@@ -445,6 +445,14 @@ NitroX is Provar's Hybrid Model for locators — it maps Salesforce component-ba
 
 **Background:** A regression in 1.5.0 (PDX-479) traced to authoring guidance that steered LLMs toward a per-step construction pattern. Multi-call construction drops scenario numbers (e.g. Scenario 1 → Scenario 3, no Scenario 2), flattens asserts that should be nested inside `UiWithScreen` clauses, and produces inconsistent assert API IDs across the case. This scenario exists so the regression class is exercised in pilot evaluation and cannot recur silently.
 
+**Defense in depth.** Three layers protect against the multi-call construction pattern:
+
+1. **Prompt and resource guidance** — authoring prompts and the MCP step-reference resource describe single-call construction as the contract.
+2. **Tool-description contract** — `provar_testcase_generate` and `provar_testcase_step_edit` descriptions explicitly mark generate as constructor-only and step_edit as amendment-only, so the LLM reads the contract at every call site (including compact schema mode).
+3. **Runtime guard** — `provar_testcase_generate` rejects the exact shape that would produce a skeleton-only file: `steps:[]` + `dry_run:false` + `output_path`. The rejection returns `STEPS_REQUIRED` with `details.suggestion` telling the LLM to pass the full step tree in one call. Empty-steps shapes that don't write a file (dry-run preview, no `output_path`) remain allowed.
+
+If a pilot LLM falls into the multi-call pattern despite the description contract, the runtime guard converts the failure into an actionable error rather than a silently broken file on disk.
+
 **Prompt:**
 
 > "Create a Provar test case `AccountFlow.testcase` that covers three scenarios:

diff --git a/docs/mcp.md b/docs/mcp.md
@@ -765,10 +765,22 @@ AssertValues uses **flat** argument structure (`expectedValue`, `actualValue`, `
 
 **Error codes**
 
-| Code               | Meaning                                                               |
-| ------------------ | --------------------------------------------------------------------- |
-| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`) |
-| `FILE_EXISTS`      | `output_path` already exists and `overwrite=false`                    |
+| Code               | Meaning                                                                                                                                                                              |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`)                                                                                                                |
+| `FILE_EXISTS`      | `output_path` already exists and `overwrite=false`                                                                                                                                   |
+| `STEPS_REQUIRED`   | Called with `steps:[]` + `dry_run:false` + `output_path` — constructing a test case requires the full step tree on the write path. `details.suggestion` tells the caller how to fix. |
+
+**`STEPS_REQUIRED`.** The rejected shape is `steps:[]` + `dry_run:false` + `output_path`. Constructing a test case requires the full step tree in a single call; passing an empty array on the write path would produce a skeleton-only file. All other empty-steps shapes remain allowed:
+
+| `steps.length` | `dry_run`     | `output_path` | Result                                                  |
+| -------------- | ------------- | ------------- | ------------------------------------------------------- |
+| 0              | `true`        | any           | Allowed — preserves skeleton inspection / IDE preview   |
+| 0              | `false`       | absent        | Allowed — no file would be written anyway               |
+| 0              | `false`       | **present**   | **Rejected** with `STEPS_REQUIRED` (no file is written) |
+| ≥ 1            | true or false | any           | Allowed — normal happy path                             |
+
+`details.suggestion` instructs the caller to pass the FULL step tree in a single call, clarifies that `provar_testcase_step_edit` is for amendment-only, and notes the `dry_run=true` escape hatch for skeleton inspection.
 
 ---
 

diff --git a/scripts/mcp-smoke.cjs b/scripts/mcp-smoke.cjs
@@ -177,6 +177,19 @@ async function runTests() {
       dry_run: true,
     });
 
+  // ── 6b. provar_testcase_generate STEPS_REQUIRED runtime guard (PDX-483) ───
+  // Drives the rejected shape (steps:[] + dry_run:false + output_path) so the
+  // PDX-479 regression-class shape is exercised on every smoke run. The smoke
+  // framework counts any JSON-RPC response as PASS; the assertion that the
+  // body carries error_code='STEPS_REQUIRED' lives in scripts/pdx-482-validate.cjs.
+  if (inGroup('authoring'))
+    await callTool('provar_testcase_generate', {
+      test_case_name: 'PDX-483 Guard Smoke',
+      steps: [],
+      dry_run: false,
+      output_path: path.join(TMP, 'pdx483-smoke-rejected.testcase'),
+    });
+
   // ── 7. provar_testcase_validate ───────────────────────────────────────────
   if (inGroup('validation')) await callTool('provar_testcase_validate', { content: '<testCase/>' });
 

diff --git a/scripts/pdx-482-validate.cjs b/scripts/pdx-482-validate.cjs
@@ -1,16 +1,26 @@
-// PDX-482 validation: confirm the construct/amend contract is reachable at the
-// MCP protocol surface in BOTH standard and compact schema modes.
+// PDX-482 / PDX-483 validation: confirm the construct/amend contract is reachable
+// at the MCP protocol surface and that the PDX-483 runtime guard rejects the
+// PDX-479 multi-call pattern shape.
 //
-// The LLM reads tools/list before every tool call, so every assertion here is
-// on bytes the LLM literally sees at the call site. Compact mode coverage is
-// critical because the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact
-// silently swapped the description for a contract-free one-liner.
+// PDX-482 (standard + compact modes): assertions on tools/list — every byte the
+// LLM literally sees at the call site. Compact mode coverage is critical because
+// the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact silently
+// swapped the description for a contract-free one-liner.
+//
+// PDX-483 (runtime-guard mode): drives a real tools/call with the rejected shape
+// (steps:[]+dry_run:false+output_path) and asserts the response is a structured
+// STEPS_REQUIRED error with a non-empty details.suggestion. This catches a
+// regression class that the tools/list assertions cannot reach: the passive
+// contract surviving in the description while the active guard silently
+// regresses (e.g. a refactor reorders the handler so writes happen before the
+// check).
 //
 //   yarn compile
 //   node scripts/pdx-482-validate.cjs
 
 'use strict';
 
+const fs = require('fs');
 const { spawn } = require('child_process');
 const os = require('os');
 const path = require('path');
@@ -238,13 +248,156 @@ function compactAssertions(toolList, record) {
   }
 }
 
+// ── PDX-483 runtime guard: tools/call assertion ─────────────────────────────
+// Drives a real tools/call(provar_testcase_generate, ...) with the rejected
+// shape (steps:[] + dry_run:false + output_path) and asserts the response is
+// a structured STEPS_REQUIRED error. This is the only check that catches a
+// silent regression where the passive description survives but the active
+// runtime guard is removed or reordered after a side effect.
+function runRuntimeGuardValidation() {
+  return new Promise((resolve, reject) => {
+    const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], {
+      stdio: ['pipe', 'pipe', 'inherit'],
+      env: { ...process.env },
+    });
+
+    let nextId = 1;
+    const pending = new Map();
+    let buf = '';
+
+    server.stdout.on('data', (chunk) => {
+      buf += chunk.toString('utf-8');
+      let nl;
+      while ((nl = buf.indexOf('\n')) !== -1) {
+        const line = buf.slice(0, nl).trim();
+        buf = buf.slice(nl + 1);
+        if (!line) continue;
+        try {
+          const msg = JSON.parse(line);
+          const cb = pending.get(msg.id);
+          if (cb) {
+            pending.delete(msg.id);
+            cb(msg);
+          }
+        } catch {
+          /* ignore */
+        }
+      }
+    });
+
+    const rpc = (method, params) => {
+      const id = nextId++;
+      const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n';
+      return new Promise((rpcResolve, rpcReject) => {
+        pending.set(id, rpcResolve);
+        setTimeout(() => {
+          if (pending.has(id)) {
+            pending.delete(id);
+            rpcReject(new Error(`Timeout waiting for ${method}`));
+          }
+        }, 10000);
+        server.stdin.write(req);
+      });
+    };
+
+    const results = [];
+    const record = (label, ok, detail) => {
+      results.push({ label: `[runtime-guard] ${label}`, ok, detail });
+    };
+
+    (async () => {
+      await rpc('initialize', {
+        protocolVersion: '2024-11-05',
+        capabilities: {},
+        clientInfo: { name: 'pdx-483-validate', version: '1.0.0' },
+      });
+
+      // Use a unique tmp path so a leftover file from a prior run can't mask the assertion.
+      const outPath = path.join(TMP, `pdx483-validate-${Date.now()}.testcase`);
+      try {
+        if (fs.existsSync(outPath)) fs.unlinkSync(outPath);
+      } catch {
+        /* best-effort */
+      }
+
+      const callRes = await rpc('tools/call', {
+        name: 'provar_testcase_generate',
+        arguments: {
+          test_case_name: 'PDX-483 validate',
+          steps: [],
+          dry_run: false,
+          output_path: outPath,
+        },
+      });
+
+      // MCP tools/call returns { result: { content: [{ type, text }], isError? } }.
+      // The tool's error body is JSON-encoded in content[0].text.
+      const result = callRes.result;
+      record(
+        'tools/call returned a result (no protocol-level error)',
+        !!result && !callRes.error,
+        callRes.error ? JSON.stringify(callRes.error).slice(0, 120) : 'protocol OK'
+      );
+      record(
+        'result.isError === true (tool-level rejection)',
+        result?.isError === true,
+        `isError: ${String(result?.isError)} — rejection must surface at content level`
+      );
+
+      let body = null;
+      try {
+        body = JSON.parse(result?.content?.[0]?.text ?? '{}');
+      } catch (parseErr) {
+        record('content[0].text parses as JSON', false, parseErr.message);
+      }
+      record(
+        'error_code === "STEPS_REQUIRED"',
+        body?.error_code === 'STEPS_REQUIRED',
+        `error_code: ${body?.error_code} — must match the documented code from docs/mcp.md`
+      );
+      record(
+        'retryable === false',
+        body?.retryable === false,
+        'STEPS_REQUIRED is a contract violation — retrying with the same payload would never succeed'
+      );
+      record(
+        'details.suggestion is a non-empty string',
+        typeof body?.details?.suggestion === 'string' && body.details.suggestion.length > 0,
+        'details.suggestion must tell the LLM how to self-correct (canonical multi-call rejection text)'
+      );
+      record(
+        'details.suggestion mentions "FULL step tree"',
+        typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('FULL step tree'),
+        'suggestion must point the LLM at the single-call pattern'
+      );
+      record(
+        'details.suggestion mentions dry_run=true escape hatch',
+        typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('dry_run=true'),
+        'suggestion must mention dry_run=true for legitimate skeleton-inspection callers'
+      );
+      record(
+        'no file written at output_path (zero side effects)',
+        !fs.existsSync(outPath),
+        'STEPS_REQUIRED must run BEFORE fs.writeFileSync — no skeleton on disk'
+      );
+
+      server.stdin.end();
+      resolve(results);
+    })().catch((err) => {
+      server.kill();
+      reject(err);
+    });
+  });
+}
+
 (async () => {
   const standardResults = await runValidation('standard', {}, standardAssertions);
   // Explicitly null out the env var on the standard pass to ensure no leakage.
   // For compact, set PROVAR_MCP_SCHEMA_MODE=compact via the spawn env.
   const compactResults = await runValidation('compact', { PROVAR_MCP_SCHEMA_MODE: 'compact' }, compactAssertions);
+  const runtimeGuardResults = await runRuntimeGuardValidation();
 
-  const allResults = [...standardResults, ...compactResults];
+  const allResults = [...standardResults, ...compactResults, ...runtimeGuardResults];
 
   let pass = 0;
   let fail = 0;
@@ -256,7 +409,7 @@ function compactAssertions(toolList, record) {
       fail++;
     }
   }
-  console.log(`\nPDX-482 validation: ${pass} passed, ${fail} failed`);
+  console.log(`\nPDX-482/PDX-483 validation: ${pass} passed, ${fail} failed`);
   process.exit(fail > 0 ? 1 : 0);
 })().catch((err) => {
   console.error('Validation script error:', err);

diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts
@@ -256,6 +256,32 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig
         target_uri: input.target_uri,
       });
 
+      // PDX-483: active runtime guard for the PDX-479 regression pattern.
+      // Rejects the exact shape that produces a contract-violating skeleton on
+      // disk: empty steps[] + non-dry-run + persistence target. Other empty-
+      // steps shapes (dry_run preview, no output_path) remain allowed.
+      if (input.steps.length === 0 && !input.dry_run && input.output_path) {
+        const err = makeError(
+          'STEPS_REQUIRED',
+          'provar_testcase_generate was called with an empty steps[] array and a target output_path. ' +
+            'This produces a contract-violating skeleton (the PDX-479 regression pattern) and is rejected.',
+          requestId,
+          false,
+          {
+            suggestion:
+              'Pass the FULL step tree to provar_testcase_generate in a single call. ' +
+              'provar_testcase_step_edit is for amending an already-validated test case ' +
+              '(single-step add, attribute fix, debug edit), not for constructing one from scratch. ' +
+              'If you genuinely want a skeleton for inspection, set dry_run=true.',
+          }
+        );
+        log('warn', 'provar_testcase_generate: STEPS_REQUIRED', {
+          requestId,
+          output_path: input.output_path,
+        });
+        return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(err) }] };
+      }
+
       try {
         const xmlContent = buildTestCaseXml(input);
         const filePath: string | undefined = input.output_path ? path.resolve(input.output_path) : undefined;