ProvarTesting · mrdailey99 · May 15, 2026 · May 15, 2026
diff --git a/package.json b/package.json
@@ -158,6 +158,7 @@
         "src/mcp/rules/*.json",
         "docs/NITROX_COMPONENT_CATALOG.md",
         "docs/NITROX_CATALOG_SOURCE.json",
+        "docs/PROVAR_TOOL_GUIDE.md",
         "**/tsconfig.json",
         "messages/**"
       ],

diff --git a/src/mcp/tools/projectValidateFromPath.ts b/src/mcp/tools/projectValidateFromPath.ts
@@ -43,6 +43,30 @@ interface ViolationSummary {
   sample_message: string;
 }
 
+function countAllProjectViolations(result: ProjectValidationResult): number {
+  // Note: ValidatedTestCase does not carry best_practices_violations at the project
+  // layer (intentional — bp surfaces via the testcase tool). The count here covers
+  // every violation visible at project/plan/suite/child_suite level plus per-tc
+  // structural issues, which is what the stop-decision safety hedge needs.
+  let total = result.project_violations.length;
+  for (const plan of result.plans) {
+    total += plan.violations.length;
+    for (const suite of plan.suites) {
+      total += suite.violations.length;
+      for (const tc of suite.test_cases) {
+        total += tc.issues.length;
+      }
+      for (const cs of suite.child_suites) {
+        total += cs.violations.length;
+      }
+    }
+    for (const utc of plan.unplanned_test_cases) {
+      total += utc.issues.length;
+    }
+  }
+  return total;
+}
+
 function buildPlanSummary(plan: ValidatedPlan): PlanSummary {
   const test_case_count = plan.suites.reduce((n, s) => n + s.test_cases.length, 0) + plan.unplanned_test_cases.length;
   return {
@@ -287,14 +311,17 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve
         }
 
         const currentViolations = result.project_violations as unknown as DiffableViolation[];
+        const allLevelViolationCount = countAllProjectViolations(result);
 
-        // Load baseline BEFORE saving to prevent eviction of the requested baseline
+        // Read baseline and history regardless of save_results — save_results controls
+        // whether the CURRENT run is persisted, not whether existing runs can be read.
+        // Load baseline BEFORE saving to prevent eviction of the requested baseline.
         const baseline =
-          save_results !== false && baseline_run_id !== undefined && baseline_run_id !== ''
+          baseline_run_id !== undefined && baseline_run_id !== ''
             ? loadBaselineViolations(storageDir, baseline_run_id)
             : null;
 
-        const hasBaseline = save_results !== false ? hasAnyRun(storageDir) : false;
+        const hasBaseline = hasAnyRun(storageDir);
 
         if (save_results !== false) {
           try {
@@ -324,7 +351,7 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve
             result.summary.test_cases_valid,
             result.summary.total_test_cases
           );
-          const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length);
+          const recommended_next_action = calcNextAction(completeness_score, true, allLevelViolationCount);
           const diffResponse = {
             requestId,
             ...(save_results !== false ? { run_id: runId } : {}),
@@ -342,7 +369,7 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve
           result.summary.test_cases_valid,
           result.summary.total_test_cases
         );
-        const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length);
+        const recommended_next_action = calcNextAction(completeness_score, hasBaseline, allLevelViolationCount);
 
         const usePlanDetails = include_plan_details || detail === 'full';
         const shaped = shapeResponse(result, usePlanDetails, max_uncovered, max_violations);

diff --git a/src/mcp/tools/testPlanValidate.ts b/src/mcp/tools/testPlanValidate.ts
@@ -12,9 +12,37 @@ import { makeError, makeRequestId } from '../schemas/common.js';
 import { log } from '../logging/logger.js';
 import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js';
 import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js';
-import { validatePlan, buildHierarchySummary, type TestPlanInput } from './hierarchyValidate.js';
+import {
+  validatePlan,
+  buildHierarchySummary,
+  type TestPlanInput,
+  type PlanResult,
+  type SuiteResult,
+} from './hierarchyValidate.js';
 import { desc } from './descHelper.js';
 
+function countSuiteViolations(suite: SuiteResult): number {
+  let total = suite.violations.length;
+  for (const tc of suite.test_cases) {
+    total += tc.issues.length + tc.best_practices_violations.length;
+  }
+  for (const child of suite.test_suites) {
+    total += countSuiteViolations(child);
+  }
+  return total;
+}
+
+function countAllPlanViolations(result: PlanResult): number {
+  let total = result.violations.length;
+  for (const suite of result.test_suites) {
+    total += countSuiteViolations(suite);
+  }
+  for (const tc of result.test_cases) {
+    total += tc.issues.length + tc.best_practices_violations.length;
+  }
+  return total;
+}
+
 // ── Zod schemas ───────────────────────────────────────────────────────────────
 
 const testCaseSchema = z
@@ -146,7 +174,8 @@ export function registerTestPlanValidate(server: McpServer): void {
         const summary = buildHierarchySummary(result);
 
         const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases);
-        const recommended_next_action = calcNextAction(completeness_score, false);
+        const remainingViolations = countAllPlanViolations(result);
+        const recommended_next_action = calcNextAction(completeness_score, false, remainingViolations);
 
         const response = {
           requestId,

diff --git a/src/mcp/tools/testSuiteValidate.ts b/src/mcp/tools/testSuiteValidate.ts
@@ -29,6 +29,7 @@ function collectAllViolations(result: SuiteResult): DiffableViolation[] {
   const all: DiffableViolation[] = [...(result.violations as unknown as DiffableViolation[])];
   for (const tc of result.test_cases) {
     all.push(...(tc.issues as unknown as DiffableViolation[]));
+    all.push(...(tc.best_practices_violations as unknown as DiffableViolation[]));
   }
   for (const child of result.test_suites) {
     all.push(...collectAllViolations(child));

diff --git a/test/unit/mcp/projectValidateFromPath.test.ts b/test/unit/mcp/projectValidateFromPath.test.ts
@@ -535,5 +535,52 @@ describe('provar_project_validate (from path)', () => {
       assert.ok('completeness_score' in diffBody, 'diff should include completeness_score');
       assert.ok('recommended_next_action' in diffBody, 'diff should include recommended_next_action');
     });
+
+    it('returns diff (not BASELINE_NOT_FOUND) when save_results=false and baseline_run_id is set (B4)', () => {
+      // Read-only diff: callers must be able to compare against an existing
+      // baseline without persisting the current run. The pre-fix gated baseline
+      // load on save_results !== false, so a valid baseline returned BASELINE_NOT_FOUND.
+      makeProject(tmpDir);
+      const first = server.call('provar_project_validate', { project_path: tmpDir });
+      const runId = (parseText(first) as { run_id: string }).run_id;
+
+      const second = server.call('provar_project_validate', {
+        project_path: tmpDir,
+        baseline_run_id: runId,
+        save_results: false,
+      });
+      assert.equal(isError(second), false, 'read-only diff must not error');
+      const body = parseText(second);
+      assert.ok('added' in body, 'read-only diff must include added');
+      assert.ok('resolved' in body, 'read-only diff must include resolved');
+      assert.ok('unchanged_count' in body, 'read-only diff must include unchanged_count');
+      assert.ok(!('run_id' in body), 'read-only diff should NOT include run_id when save_results=false');
+    });
+  });
+
+  describe('PDX-473 — stop decision counts all-level violations (B3)', () => {
+    it('recommended_next_action is NOT stop when nested violations remain at completeness 100', () => {
+      // The fixture project (makeProject) creates a structurally valid test case
+      // covered by a plan, yielding test_cases_valid===total. But the project
+      // typically has plan/suite-level violations (e.g. missing plan metadata
+      // from the bare .planitem). The stop decision must reflect those.
+      makeProject(tmpDir);
+      const result = server.call('provar_project_validate', {
+        project_path: tmpDir,
+        save_results: false,
+      });
+      assert.equal(isError(result), false);
+      const body = parseText(result);
+      if (body['completeness_score'] === 100) {
+        // If the fixture happens to be 100% complete in completeness terms, the
+        // stop decision must still account for any nested violations that the
+        // pre-fix snapshot ignored.
+        assert.notEqual(
+          body['recommended_next_action'],
+          'stop',
+          `Expected NOT stop while nested violations remain, got: ${String(body['recommended_next_action'])}`
+        );
+      }
+    });
   });
 });
diff --git a/test/unit/mcp/testPlanValidate.test.ts b/test/unit/mcp/testPlanValidate.test.ts
@@ -422,7 +422,12 @@ describe('provar_testplan_validate', () => {
       assert.ok(valid.includes(body['recommended_next_action'] as string));
     });
 
-    it('recommended_next_action is stop when all test cases are valid (score=100)', () => {
+    it('recommended_next_action is NOT stop when test cases are structurally valid but BP violations remain (B1)', () => {
+      // TC_VALID parses as structurally valid (issues=0) but has BP violations
+      // (e.g. STRUCT-SUMMARY-001 — no <summary> tag). With fullMeta() the plan
+      // itself has no PLAN-META-* violations. The stop-decision safety hedge
+      // must include the nested per-test-case BP violations, so the action
+      // must NOT be 'stop' until those are resolved.
       const result = server.call('provar_testplan_validate', {
         plan_name: 'AllValidPlan',
         test_suites: [SUITE_VALID],
@@ -431,7 +436,32 @@ describe('provar_testplan_validate', () => {
 
       const body = parseText(result);
       assert.equal(body['completeness_score'], 100);
-      assert.equal(body['recommended_next_action'], 'stop');
+      assert.notEqual(
+        body['recommended_next_action'],
+        'stop',
+        `Expected NOT stop while BP violations remain, got: ${String(body['recommended_next_action'])}`
+      );
+    });
+
+    it('recommended_next_action is NOT stop when score=100 but plan metadata violations remain (B1)', () => {
+      // Same TC_VALID as above (structurally valid → completeness=100), but
+      // plan metadata is OMITTED, which triggers PLAN-META-* violations at the
+      // plan level. The old impl passed (score, false) to calcNextAction with
+      // a default remainingViolationCount=0, so stop fired despite plan
+      // violations. The fix collects plan/suite/tc/bp counts.
+      const result = server.call('provar_testplan_validate', {
+        plan_name: 'MissingMetaPlan',
+        test_suites: [SUITE_VALID],
+        // metadata intentionally omitted → PLAN-META-001..007 fire
+      });
+
+      const body = parseText(result);
+      assert.equal(body['completeness_score'], 100);
+      assert.notEqual(
+        body['recommended_next_action'],
+        'stop',
+        `Expected NOT stop while plan metadata violations remain, got: ${String(body['recommended_next_action'])}`
+      );
     });
 
     it('recommended_next_action is inspect_failures when plan has failures (no baseline)', () => {

diff --git a/test/unit/mcp/testSuiteValidate.test.ts b/test/unit/mcp/testSuiteValidate.test.ts
@@ -425,14 +425,22 @@ describe('provar_testsuite_validate', () => {
       assert.ok(['stop', 'inspect_failures', 'fix_and_revalidate'].includes(action), `Unexpected action: ${action}`);
     });
 
-    it('recommended_next_action is "stop" when completeness_score is 100', () => {
+    it('recommended_next_action is NOT "stop" when test cases have BP violations (B2)', () => {
+      // TC_VALID is structurally valid (issues.length=0) but has BP violations
+      // (e.g. STRUCT-SUMMARY-001 — no <summary> tag). collectAllViolations must
+      // include tc.best_practices_violations so the stop-decision safety hedge
+      // sees the remaining work; otherwise stop fires while BP issues remain.
       const result = server.call('provar_testsuite_validate', {
         suite_name: 'StopSuite',
         test_cases: [TC_VALID],
       });
       const body = parseText(result);
       assert.equal(body['completeness_score'], 100);
-      assert.equal(body['recommended_next_action'], 'stop');
+      assert.notEqual(
+        body['recommended_next_action'],
+        'stop',
+        `Expected NOT stop while BP violations remain, got: ${String(body['recommended_next_action'])}`
+      );
     });
   });