diff --git a/.github/workflows/DeployManual.yml b/.github/workflows/DeployManual.yml index 8fe8456e..f8a31e0e 100644 --- a/.github/workflows/DeployManual.yml +++ b/.github/workflows/DeployManual.yml @@ -24,6 +24,13 @@ jobs: registry-url: 'https://registry.npmjs.org' scope: '@provartesting' - name: Install dependencies and build + env: + # Required by scripts/fetch-nitrox-packages.cjs to pull the latest + # NitroX schemas and component catalog from ProvarTesting/factPackages + # at release time. Without this, prepack falls back to the bundled + # snapshots and ships them as-is. The auto-provided GITHUB_TOKEN has + # read access to factPackages. + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | npm install -g @salesforce/cli yarn @@ -75,23 +82,27 @@ jobs: if: success() env: RELEASE_BODY: ${{ github.event.release.body }} + GH_TOKEN: ${{ github.token }} run: | VERSION=$(node -p "require('./package.json').version") TAG="${{ github.event.inputs.tag || 'latest' }}" # --- Determine change notes source --- if [ -n "$RELEASE_BODY" ]; then - # GitHub Release body provided — use it verbatim + # GitHub Release body provided via release event — use it verbatim NOTES="$RELEASE_BODY" else - # Auto-extract from git log since the previous tag + # workflow_dispatch: fetch release notes from the GitHub release for this version + NOTES=$(gh api "repos/${{ github.repository }}/releases/tags/v${VERSION}" --jq '.body' 2>/dev/null || echo "") + fi + + if [ -z "$NOTES" ]; then + # Final fallback: auto-extract from git log since the previous tag if [ "${{ github.event_name }}" = "release" ]; then - # Release event: HEAD is the new tag — find the nearest ancestor tag before it PREV=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || git tag --sort=-version:refname | head -1) else - # Manual dispatch: find the nearest ancestor tag from HEAD - # (git describe respects branch ancestry; avoids pulling in commits from sibling branches) - PREV=$(git describe --tags --abbrev=0 HEAD 2>/dev/null || true) + # HEAD^ avoids returning the current tag itself when HEAD is exactly at a tag + PREV=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || true) fi RANGE="${PREV:+${PREV}..}HEAD" diff --git a/CLAUDE.md b/CLAUDE.md index 9251476c..0559d948 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -104,3 +104,16 @@ The project uses ESLint with `@typescript-eslint` strict rules. Common gotchas: - `camelcase` — `nitroX` is valid camelCase (capital X starts the next word) CI runs lint as part of `sf-prepack` — do not skip with `--no-verify` on the final merge commit. + +--- + +## Script naming convention + +Files anywhere under `scripts/` (including nested subdirectories) must be named for what they **do**, not for the ticket that prompted them. Ticket-prefixed names (e.g. `pdx-482-validate.cjs`) leak internal Jira plumbing into the file tree, age poorly once the ticket closes, and surface in customer-visible artifacts (CI logs, PR diffs, repo browsing). + +- **Allowed:** `authoring-flow-trace.cjs`, `construction-contract-validate.cjs`, `mcp-smoke.cjs`, `fetch-nitrox-packages.cjs` +- **Rejected:** `pdx-482-validate.cjs`, `PDX_481_trace.cjs`, `scripts/tmp/pdx-999.cjs`, anything whose basename matches `^pdx[-_]?\d+` (case-insensitive) + +Enforced by `scripts/lint-script-names.cjs`, which walks `scripts/` recursively and runs as a dependency of `yarn lint` (wireit `lint:script-names`). The check fails the lint step if any ticket-prefixed filename appears at any depth. + +Ticket IDs and rationale belong in commit messages and PR descriptions, not in filenames or in user-facing docs (`docs/mcp.md`, `docs/mcp-pilot-guide.md`). diff --git a/docs/PROVAR_TOOL_GUIDE.md b/docs/PROVAR_TOOL_GUIDE.md index 651e1bc7..1f62a823 100644 --- a/docs/PROVAR_TOOL_GUIDE.md +++ b/docs/PROVAR_TOOL_GUIDE.md @@ -77,15 +77,27 @@ provar_properties_set { file_path: "", key: "connectionName", valu ## "I want to write a new test" +A Provar test case is a tree (scenarios → UI screens → asserts), not a flat list of steps. The agent that calls `provar_testcase_generate` is responsible for constructing the full tree in **one** call. Splitting authoring across many tool calls causes scenario numbering drift, flat asserts, and inconsistent step types — `provar_testcase_step_edit` is for **amending** an existing test case, not for **constructing** one. + +Recommended sequence: + ``` -1. provar_project_inspect { project_path } ← find coverage gaps first -2. provar_testcase_generate { project_path, name, ... } -3. provar_testcase_step_edit { test_case_path, ... } ← repeat per step -4. provar_testcase_validate { file_path } ← must pass before adding to plan -5. provar_testplan_add-instance { project_path, plan_name, test_case_path } -6. provar_testplan_validate { project_path, plan_name } +1. provar_project_inspect { project_path } ← find coverage gaps first +2. provar_qualityhub_examples_retrieve { object_or_scenario } ← ground in corpus examples for the step types you need +3. provar_testcase_generate { test_case_name, steps: [] } ← single call, full step tree in one payload +4. provar_testcase_validate { file_path } ← must pass before adding to plan +5. provar_testplan_add-instance { project_path, plan_name, test_case_path } +6. provar_testplan_validate { project_path, plan_name } ``` +Use `provar_testcase_step_edit` only when: + +- Adding a single step to an existing, already-validated test case +- Fixing a step's attributes after a validation finding +- Targeted edits during debugging + +Do **not** use `provar_testcase_step_edit` to construct a test case step-by-step from an empty skeleton — the LLM loses scenario context between calls and the resulting structure is unreliable. + --- ## "I want to work with Salesforce metadata" diff --git a/docs/mcp-pilot-guide.md b/docs/mcp-pilot-guide.md index c5d2085b..bf7e0073 100644 --- a/docs/mcp-pilot-guide.md +++ b/docs/mcp-pilot-guide.md @@ -439,6 +439,50 @@ NitroX is Provar's Hybrid Model for locators — it maps Salesforce component-ba --- +### Scenario 12: Construct a Multi-Scenario Test Case in a Single Call + +**Goal:** Confirm the AI authors a multi-scenario test case by passing the full step tree to `provar_testcase_generate` in **one** call — not by generating an empty skeleton and looping `provar_testcase_step_edit` per step. + +**Background:** A previously observed regression traced to authoring guidance that steered LLMs toward a per-step construction pattern. Multi-call construction drops scenario numbers (e.g. Scenario 1 → Scenario 3, no Scenario 2), flattens asserts that should be nested inside `UiWithScreen` clauses, and produces inconsistent assert API IDs across the case. This scenario exists so the regression class is exercised in pilot evaluation and cannot recur silently. + +**Defense in depth.** Three layers protect against the multi-call construction pattern: + +1. **Prompt and resource guidance** — authoring prompts and the MCP step-reference resource describe single-call construction as the contract. +2. **Tool-description contract** — `provar_testcase_generate` and `provar_testcase_step_edit` descriptions explicitly mark generate as constructor-only and step_edit as amendment-only, so the LLM reads the contract at every call site (including compact schema mode). +3. **Runtime guard** — `provar_testcase_generate` rejects the exact shape that would produce a skeleton-only file: `steps:[]` + `dry_run:false` + `output_path`. The rejection returns `STEPS_REQUIRED` with `details.suggestion` telling the LLM to pass the full step tree in one call. Empty-steps shapes that don't write a file (dry-run preview, no `output_path`) remain allowed. + +If a pilot LLM falls into the multi-call pattern despite the description contract, the runtime guard converts the failure into an actionable error rather than a silently broken file on disk. + +**Title-level contract:** the chip-level `title` fields for the two tools — `Generate Test Case (full steps in one call)` and `Amend Existing Test Case Step` — carry the construct-vs-amend split at the tool-picker surface. MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references in chat threads) still expose the contract to the agent before any description is read. + +**Prompt:** + +> "Create a Provar test case `AccountFlow.testcase` that covers three scenarios: +> +> 1. **Create Account** — navigate to the Account home, click New, set Name = `{AccountName}` and Phone = `{AccountPhone}`, click Save +> 2. **Verify Account on List** — navigate back to the Account list view, assert the Name and Phone values +> 3. **Open Account Detail** — open the just-created Account, assert all saved field values +> +> Use UI On Screen wrappers, AssertValues for value assertions, and reference SetValues variables with `{Name}`. Write to `/tests/AccountFlow.testcase`." + +**What to look for (PASS):** + +- Exactly **one** call to `provar_testcase_generate` with a populated `steps[]` array — not a call with `steps: []` followed by N `step_edit` calls +- The generated XML lists three scenarios numbered consecutively (1, 2, 3 — no skipped numbers) +- Each scenario's UI actions and asserts are nested inside the appropriate `UiWithScreen` clause (or its equivalent grouping element) — not flat siblings under `` +- Assert step types are consistent across the case (e.g. all `AssertValues`, not mixed `AssertValues` + `UiAssert` for the same purpose) +- `provar_testcase_validate` on the result returns `is_valid: true` + +**What to look for (FAIL — regression indicator):** + +- Two or more calls to `provar_testcase_generate` for the same file +- A call to `provar_testcase_generate` with `steps: []` followed by `provar_testcase_step_edit` calls +- The generated case skips a scenario number, mixes assert API IDs for similar assertions, or emits asserts as flat siblings rather than nested inside the screen wrapper + +If any FAIL indicator appears, report it to the Provar team with the prompt and the generated XML attached. + +--- + ## Security Model ### What the server does diff --git a/docs/mcp.md b/docs/mcp.md index 592d8f4e..33d23421 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -77,6 +77,7 @@ The Provar DX CLI ships with a built-in **Model Context Protocol (MCP) server** - [AI loop pattern](#ai-loop-pattern) - [Quality scores explained](#quality-scores-explained) - [API compatibility — `xml` vs `xml_content`](#api-compatibility--xml-vs-xml_content) +- [Performance Tuning](#performance-tuning) --- @@ -462,10 +463,61 @@ sf provar auth clear ### Environment variables -| Variable | Purpose | Default | -| ------------------------ | ------------------------------------- | ------------------------------------------------- | -| `PROVAR_API_KEY` | API key for Quality Hub validation | None — falls back to `~/.provar/credentials.json` | -| `PROVAR_QUALITY_HUB_URL` | Override the Quality Hub API base URL | Dev API Gateway URL (`/dev`) | +| Variable | Purpose | Default | +| ------------------------ | ---------------------------------------------------------- | ------------------------------------------------- | +| `PROVAR_API_KEY` | API key for Quality Hub validation | None — falls back to `~/.provar/credentials.json` | +| `PROVAR_QUALITY_HUB_URL` | Override the Quality Hub API base URL | Dev API Gateway URL (`/dev`) | +| `PROVAR_MCP_SCHEMA_MODE` | Set to `compact` to shorten all tool descriptions | Standard (full) descriptions | +| `PROVAR_MCP_TOOLS` | Comma-separated list of tool groups to register at startup | All groups registered | + +--- + +## Agent performance tuning + +Two environment variables let you reduce the context budget consumed by the ProvarDX MCP server — useful when working with agents that have a limited context window or a large number of registered tools. + +### Compact descriptions (`PROVAR_MCP_SCHEMA_MODE`) + +``` +PROVAR_MCP_SCHEMA_MODE=compact +``` + +When set to `compact`, most tool and parameter descriptions are replaced with short summaries (typically ≤15 words). This can save hundreds of tokens per tool in the initial context handshake, at the cost of reduced in-description guidance for the agent. + +Use this mode if: + +- Your agent reports context limit warnings on startup +- You are using a smaller model with a tighter context budget +- Your agents already have domain context and don't need verbose descriptions + +### Tool group filtering (`PROVAR_MCP_TOOLS`) + +``` +PROVAR_MCP_TOOLS=nitrox,authoring +``` + +Restricts which tool groups are registered when the server starts. Only the groups listed (comma-separated, case-insensitive) are made available. `provardx_ping` is always registered regardless of this setting. + +| Group name | Tools registered | +| ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `nitrox` | `provar_nitrox_discover`, `provar_nitrox_generate`, `provar_nitrox_patch`, `provar_nitrox_read`, `provar_nitrox_validate` | +| `automation` | `provar_automation_setup`, `provar_automation_config_load`, `provar_automation_metadata_download`, `provar_automation_compile`, `provar_automation_testrun` | +| `qualityhub` | `provar_qualityhub_connect`, `provar_qualityhub_display`, `provar_qualityhub_testrun`, `provar_qualityhub_testrun_abort`, `provar_qualityhub_testrun_report`, `provar_qualityhub_examples_retrieve`, `provar_qualityhub_testcase_retrieve`, `provar_qualityhub_defect_create` | +| `validation` | `provar_project_validate`, `provar_ant_generate`, `provar_ant_validate`, `provar_properties_*`, `provar_testcase_validate`, `provar_testsuite_validate`, `provar_testplan_validate`, `provar_pageobject_validate` | +| `authoring` | `provar_testcase_generate`, `provar_pageobject_generate`, `provar_testcase_step_edit`, `provar_testplan_*` | +| `inspect` | `provar_project_inspect` | +| `connection` | `provar_connection_list` | +| `rca` | `provar_testrun_rca`, `provar_testrun_report_locate` | + +**Example — NitroX-only session:** + +```json +{ + "env": { + "PROVAR_MCP_TOOLS": "nitrox" + } +} +``` --- @@ -510,14 +562,17 @@ Inspects a Provar project folder and returns a structured inventory of all key p **Input** -| Parameter | Type | Required | Description | -| -------------- | ------ | -------- | ---------------------------------------- | -| `project_path` | string | yes | Absolute path to the Provar project root | +| Parameter | Type | Required | Description | +| -------------- | --------------------------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `project_path` | string | yes | Absolute path to the Provar project root | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"` returns only `requestId`, `project_path`, `provar_home`, and `summary`. `"standard"` (default) returns full inventory. `"full"` is identical to `"standard"` for this tool. | +| `fields` | string | no | Comma-separated top-level keys to retain (e.g. `"test_case_files,summary"`). Supports dot notation for nested filtering (e.g. `"test_project.connections"`). Unknown field names are silently ignored. Applied after the `detail` filter. | **Output** — JSON object containing: | Field | Description | | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| `requestId` | Unique identifier for this request (always present, including in `detail="summary"` responses) | | `provar_home` | The Provar installation path, or `null` if not found | | `provar_home_source` | Where the value came from: `"PROVAR_HOME environment variable"`, `"provardx-properties.json ()"`, or `"ANT build file ()"` | | `provardx_properties_files` | Relative paths to any `provardx-properties.json` files found (ProvarDX CLI run configs) | @@ -556,9 +611,10 @@ Lists all connections and named environments defined in the project's `.testproj **Input** -| Parameter | Type | Required | Description | -| -------------- | ------ | -------- | ----------------------------------------------------------------- | -| `project_path` | string | yes | Absolute path to the Provar project root (within `allowed-paths`) | +| Parameter | Type | Required | Description | +| -------------- | ------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `project_path` | string | yes | Absolute path to the Provar project root (within `allowed-paths`) | +| `fields` | string | no | Comma-separated response keys to retain (e.g. `"connections,summary"`). Supports dot notation (e.g. `"connections.name,connections.type"`). Unknown fields are silently ignored. | **Output** @@ -647,6 +703,10 @@ Validates a Java Page Object source file against 30+ quality rules (structural c Generates an XML test case skeleton with UUID v4 guids and sequential `testItemId` values. +The tool's chip-level `title` — `Generate Test Case (full steps in one call)` — carries the construction contract so that MCP clients which render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references) surface the single-call requirement to the agent before any description is read. + +> **Construction pattern (read first).** Pass the FULL step tree for the test case in a single call via the `steps[]` array. Do **not** call this tool with `steps: []` and then append steps via repeated `provar_testcase_step_edit` calls — that pattern drops scenarios, flattens nesting, and produces inconsistent step types. `provar_testcase_step_edit` is for **amending** an already-validated test case (single-step add, attribute fix, debug edit), not for **constructing** one from scratch. + **Generated `` element structure (Provar requirements):** ```xml @@ -707,10 +767,22 @@ AssertValues uses **flat** argument structure (`expectedValue`, `actualValue`, ` **Error codes** -| Code | Meaning | -| ------------------ | --------------------------------------------------------------------- | -| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`) | -| `FILE_EXISTS` | `output_path` already exists and `overwrite=false` | +| Code | Meaning | +| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`) | +| `FILE_EXISTS` | `output_path` already exists and `overwrite=false` | +| `STEPS_REQUIRED` | Called with `steps:[]` + `dry_run:false` + `output_path` — constructing a test case requires the full step tree on the write path. `details.suggestion` tells the caller how to fix. | + +**`STEPS_REQUIRED`.** The rejected shape is `steps:[]` + `dry_run:false` + `output_path`. Constructing a test case requires the full step tree in a single call; passing an empty array on the write path would produce a skeleton-only file. All other empty-steps shapes remain allowed: + +| `steps.length` | `dry_run` | `output_path` | Result | +| -------------- | ------------- | ------------- | ------------------------------------------------------- | +| 0 | `true` | any | Allowed — preserves skeleton inspection / IDE preview | +| 0 | `false` | absent | Allowed — no file would be written anyway | +| 0 | `false` | **present** | **Rejected** with `STEPS_REQUIRED` (no file is written) | +| ≥ 1 | true or false | any | Allowed — normal happy path | + +`details.suggestion` instructs the caller to pass the FULL step tree in a single call, clarifies that `provar_testcase_step_edit` is for amendment-only, and notes the `dry_run=true` escape hatch for skeleton inspection. --- @@ -720,29 +792,34 @@ Validates an XML test case for schema correctness (validity score) and best prac **Input** -| Parameter | Type | Required | Description | -| ----------- | ------ | ------------------------------------------- | ---------------------------------------------- | -| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | -| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | -| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | +| Parameter | Type | Required | Description | +| ----------------- | --------------------------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | +| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | +| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: is_valid, scores, and stop signal only. `"standard"`/`"full"`: full issues list (default). | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved issues since that run (`{ added, resolved, unchanged_count, run_id }`). Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | **Output** -| Field | Type | Description | -| -------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------ | -| `is_valid` | boolean | `true` if zero ERROR-level schema violations | -| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | -| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | -| `error_count` | integer | Schema error count | -| `warning_count` | integer | Schema warning count | -| `step_count` | integer | Number of `` steps | -| `test_case_id` | string | Value of the `id` attribute | -| `test_case_name` | string | Value of the `name` attribute | -| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | -| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | -| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | -| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | -| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | +| Field | Type | Description | +| -------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------ | +| `run_id` | string | Stable identifier for this validation run. Pass as `baseline_run_id` in the next call to receive only new/resolved issues. | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total test cases validated (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"` (all passing), `"continue"` (issues remain), or `"escalate"` (no baseline yet — run without `baseline_run_id` first). | +| `is_valid` | boolean | `true` if zero ERROR-level schema violations | +| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | +| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | +| `error_count` | integer | Schema error count | +| `warning_count` | integer | Schema warning count | +| `step_count` | integer | Number of `` steps | +| `test_case_id` | string | Value of the `id` attribute | +| `test_case_name` | string | Value of the `name` attribute | +| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | +| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | +| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | +| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | +| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | **Key schema rules:** TC_001 (missing XML declaration), TC_002 (malformed XML), TC_003 (wrong root element), TC_010/011/012 (missing/invalid id/guid), TC_031 (invalid apiCall guid), TC_034/035 (non-integer testItemId). @@ -756,6 +833,15 @@ Validates an XML test case for schema correctness (validity score) and best prac - **VAR-REF-001** — An argument value looks like a variable reference (`{VarName}` or `{Obj.Field}`) but is stored as `class="value" valueClass="string"`. Provar will treat it as a literal string, not resolve the variable. Replace with `class="variable"` and `` elements. - **VAR-REF-002** — A `{VarName}` token is embedded inside a larger plain string (e.g. `SELECT Id FROM Account WHERE Id = '{AccountId}'`). Provar does not perform `{…}` interpolation in string values at runtime; the braces are emitted literally. Use `class="compound"` with `` children to split the literal text and variable references. In `provar_testcase_generate`, pass the value with `{VarName}` placeholders — the generator emits compound XML automatically. +**Error codes** + +| Code | Meaning | +| -------------------- | ------------------------------------------------------------------------------------------------- | +| `BASELINE_NOT_FOUND` | The `baseline_run_id` was not found. Run without `baseline_run_id` first to establish a baseline. | +| `VALIDATE_ERROR` | Unexpected validation error | +| `FILE_NOT_FOUND` | `file_path` does not exist | +| `PATH_NOT_ALLOWED` | `file_path` is outside the server's `--allowed-paths` | + --- ### `provar_testsuite_validate` @@ -764,15 +850,23 @@ Validates a Provar test suite — checks for empty suites, duplicate names (with **Input** -| Parameter | Type | Required | Description | -| ------------------- | -------------- | -------- | ------------------------------------------------------------------------------------------------------------ | -| `suite_name` | string | yes | Name of the test suite | -| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | -| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | -| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be "valid" (default: 80) | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `suite_name` | string | yes | Name of the test suite | +| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | +| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | +| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be "valid" (default: 80) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and per-test-case results (default). | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | -**Output** — `{ name, level: "suite", quality_score, violations[], test_cases[], test_suites[], summary }` +**Output** — `{ run_id, completeness_score, recommended_next_action, name, level: "suite", quality_score, violations[], test_cases[], test_suites[], summary }` + +| Field | Type | Description | +| ------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------- | +| `run_id` | string | Stable identifier for this run. Pass as `baseline_run_id` in the next call to receive only new/resolved violations. | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | **Violation rule IDs:** SUITE-EMPTY-001, SUITE-DUP-001, SUITE-DUP-002, SUITE-SIZE-001, SUITE-NAMING-001, SUITE-NAMING-002 @@ -784,14 +878,15 @@ Validates a Provar test plan — checks for empty plans, duplicate suite names, **Input** -| Parameter | Type | Required | Description | -| ------------------- | -------------- | -------- | --------------------------------------- | -| `plan_name` | string | yes | Name of the test plan | -| `test_suites` | array | no | Test suites in this plan | -| `test_cases` | array | no | Test cases directly in this plan | -| `test_suite_count` | integer | no | Override suite count for the size check | -| `metadata` | object | no | Plan completeness metadata (see below) | -| `quality_threshold` | number (0–100) | no | Minimum quality score (default: 80) | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `plan_name` | string | yes | Name of the test plan | +| `test_suites` | array | no | Test suites in this plan | +| `test_cases` | array | no | Test cases directly in this plan | +| `test_suite_count` | integer | no | Override suite count for the size check | +| `metadata` | object | no | Plan completeness metadata (see below) | +| `quality_threshold` | number (0–100) | no | Minimum quality score (default: 80) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and hierarchy results (default). | **`metadata` fields** @@ -806,7 +901,12 @@ Validates a Provar test plan — checks for empty plans, duplicate suite names, | `test_data_strategy` | How test data is prepared and cleaned up | | `risks` | Identified risks and mitigations | -**Output** — `{ name, level: "plan", quality_score, violations[], test_suites[], test_cases[], summary }` +**Output** — `{ completeness_score, recommended_next_action, name, level: "plan", quality_score, violations[], test_suites[], test_cases[], summary }` + +| Field | Type | Description | +| ------------------------- | ------------ | ---------------------------------------------------------------------------------------------------- | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | **Violation rule IDs:** PLAN-EMPTY-001, PLAN-DUP-001, PLAN-SIZE-001, PLAN-NAMING-001, PLAN-META-001 through PLAN-META-007 @@ -820,27 +920,32 @@ Validates a Provar project directly from its directory on disk. Reads the plan/s **Input** -| Parameter | Type | Required | Description | -| ---------------------- | -------------- | -------- | -------------------------------------------------------------------------------------------------------------------- | -| `project_path` | string | yes | Absolute path to the Provar project root (directory containing `.testproject`) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid (default: 80) | -| `save_results` | boolean | no | Write a QH-compatible JSON report to `{project_path}/provardx/validation/` (default: true) | -| `results_dir` | string | no | Override the output directory for the saved report (must be within `allowed-paths`) | -| `include_plan_details` | boolean | no | Include full per-suite and per-test-case data in the response (default: false — keep false to avoid token explosion) | -| `max_uncovered` | integer | no | Maximum uncovered test case paths to return (default: 20; set to `0` for none) | -| `max_violations` | integer | no | When `include_plan_details: true`, caps project violations returned (default: 50) | +| Parameter | Type | Required | Description | +| ---------------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `project_path` | string | yes | Absolute path to the Provar project root (directory containing `.testproject`) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid (default: 80) | +| `save_results` | boolean | no | Write a QH-compatible JSON report to `{project_path}/provardx/validation/` (default: true) | +| `results_dir` | string | no | Override the output directory for the saved report (must be within `allowed-paths`) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: key scores and stop signal only. `"standard"`: slim violation summary (default). `"full"`: full per-suite and per-test-case data. | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved project violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. Requires `save_results: true`. | +| `include_plan_details` | boolean | no | **@deprecated** — use `detail="full"` instead. Include full per-suite and per-test-case data (default: false). | +| `max_uncovered` | integer | no | **@deprecated** — response is automatically scoped by `detail` level. Maximum uncovered test case paths to return (default: 20). | +| `max_violations` | integer | no | **@deprecated** — response is automatically scoped by `detail` level. Caps project violations returned when `include_plan_details: true` (default: 50). | **Output** (slim mode, `include_plan_details: false`) -| Field | Description | -| ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `quality_score` | Project quality score (0–100) | -| `coverage_percent` | Percentage of test cases covered by at least one plan | -| `violation_summary` | Map of `rule_id → count` for all violations found | -| `plan_scores` | Array of `{ name, quality_score }` per plan | -| `uncovered_test_cases` | Uncovered test case paths (capped at `max_uncovered`) | -| `save_error` | Present only if the results file could not be written | -| `plan_integrity_warnings` | Present when any plan or suite directory is missing a `.planitem` file — test instances in those directories are silently invisible to the Provar runner | +| Field | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `run_id` | Stable identifier for this run (only present when `save_results: true`). Pass as `baseline_run_id` in the next call to receive only new/resolved violations. | +| `completeness_score` | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | +| `quality_score` | Project quality score (0–100) | +| `coverage_percent` | Percentage of test cases covered by at least one plan | +| `violation_summary` | Map of `rule_id → count` for all violations found | +| `plan_scores` | Array of `{ name, quality_score }` per plan | +| `uncovered_test_cases` | Uncovered test case paths (capped at `max_uncovered`) | +| `save_error` | Present only if the results file could not be written | +| `plan_integrity_warnings` | Present when any plan or suite directory is missing a `.planitem` file — test instances in those directories are silently invisible to the Provar runner | When `include_plan_details: true`, the response additionally includes full `test_plans[]` with nested suite and per-test-case data. @@ -848,7 +953,7 @@ When `include_plan_details: true`, the response additionally includes full `test **Violation rule IDs:** PROJ-EMPTY-001, PROJ-DUP-001, PROJ-DUP-002, PROJ-CALLABLE-001, PROJ-CALLABLE-002, PROJ-CONN-001, PROJ-ENV-001, PROJ-ENV-002, PROJ-SECRET-001 -**Error codes:** `NOT_A_PROJECT`, `AMBIGUOUS_PROJECT`, `PATH_NOT_FOUND`, `PATH_NOT_ALLOWED`, `PATH_TRAVERSAL` +**Error codes:** `NOT_A_PROJECT`, `AMBIGUOUS_PROJECT`, `PATH_NOT_FOUND`, `PATH_NOT_ALLOWED`, `PATH_TRAVERSAL`, `BASELINE_NOT_FOUND` (baseline run not found — run without `baseline_run_id` first to establish a baseline) --- @@ -1143,12 +1248,14 @@ Displays information about the currently connected Quality Hub org. Invokes `sf **Input** -| Parameter | Type | Required | Description | -| ------------ | -------- | -------- | ------------------------------------------ | -| `target_org` | string | no | SF CLI org alias (uses default if omitted) | -| `flags` | string[] | no | Additional raw CLI flags | +| Parameter | Type | Required | Description | +| ------------ | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `target_org` | string | no | SF CLI org alias (uses default if omitted) | +| `flags` | string[] | no | Additional raw CLI flags | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"` returns only `requestId` and `exitCode`. `"standard"` (default) returns `requestId`, `exitCode`, `stdout`, and `stderr`. | +| `fields` | string | no | Comma-separated response keys to retain (e.g. `"exitCode,stdout"`). Unknown fields are silently ignored. Applied after the `detail` filter. | -**Output** — `{ requestId, exitCode, stdout, stderr }` +**Output** — `{ requestId, exitCode, stdout, stderr }`. Use `detail="summary"` to reduce to `{ requestId, exitCode }` only, or pass `fields` to select specific keys. --- @@ -1213,12 +1320,14 @@ Retrieves test cases from Quality Hub by user story or metadata component. Invok **Input** -| Parameter | Type | Required | Description | -| ------------ | -------- | -------- | ------------------------------------------------------------------------------------ | -| `target_org` | string | yes | SF CLI org alias or username | -| `flags` | string[] | no | Additional raw CLI flags (e.g. `["--issues", "US-123", "--test-project", "MyProj"]`) | +| Parameter | Type | Required | Description | +| ------------ | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `target_org` | string | yes | SF CLI org alias or username | +| `flags` | string[] | no | Additional raw CLI flags (e.g. `["--issues", "US-123", "--test-project", "MyProj"]`) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"` returns only `requestId` and `exitCode`. `"standard"` (default) returns `requestId`, `exitCode`, `stdout`, and `stderr`. | +| `fields` | string | no | Comma-separated response keys to retain (e.g. `"exitCode,stdout"`). Unknown fields are silently ignored. Applied after the `detail` filter. | -**Output** — `{ requestId, exitCode, stdout, stderr }` +**Output** — `{ requestId, exitCode, stdout, stderr }`. Use `detail="summary"` to reduce to `{ requestId, exitCode }` only, or pass `fields` to select specific keys. **Error codes:** `QH_RETRIEVE_FAILED`, `SF_NOT_FOUND` @@ -1452,6 +1561,10 @@ Salesforce DML error categories (`SALESFORCE_*`) represent test-data failures Atomically add or remove a single step (``) in a Provar XML test case file. Writes a `.bak` backup before mutating, runs structural validation after the edit, and automatically restores the backup if validation fails. +The tool's chip-level `title` — `Amend Existing Test Case Step` — signals the amendment-only contract in MCP clients that render only the title (Claude Desktop tool-picker chips, Cursor audit pane, inline tool-call references). An agent that reads only the title still sees that this tool operates on an existing test case, not a new one. + +> **When to use.** This tool is for **amending** an existing, already-validated test case (single-step add, attribute fix, debug edit). It is **not** for constructing a test case from scratch by calling it repeatedly after a `steps: []` `provar_testcase_generate`. Building a case step-by-step via repeated `step_edit` calls produces structurally invalid test cases (dropped scenarios, flat asserts, inconsistent step types). For new test cases, pass the full step tree to `provar_testcase_generate` in a single call. + Prerequisites: the test case file must exist and be valid XML with a `` structure. | Input | Type | Required | Description | @@ -2005,7 +2118,7 @@ Version metadata for the bundled NitroX component catalog and JSON schemas. Retu } ``` -`commitSha` and `fetchedAt` are `null` when the release build could not reach the internal source (fallback catalog in use). `schemasUpdated` is `true` when both `FactComponent.schema` and `FactPackage.schema` were successfully fetched from the same internal revision and bundled into this release; `false` when the schema fetch failed and the previously committed schemas are in use; `null` when the catalog source was not generated (dev build or pre-PDX-464 release). +`commitSha` and `fetchedAt` are `null` when the release build could not reach the internal source (fallback catalog in use). `schemasUpdated` is `true` when both `FactComponent.schema` and `FactPackage.schema` were successfully fetched from the same internal revision and bundled into this release; `false` when the schema fetch failed and the previously committed schemas are in use; `null` when the catalog source was not generated (dev build or an older release that predates this metadata). --- @@ -2051,3 +2164,80 @@ provar_nitrox_patch → apply targeted edits to an existing .po.json (RFC 7 ``` > **Note:** `provar_automation_*` and `provar_qualityhub_*` tools invoke `sf` CLI subprocesses. The Salesforce CLI must be installed and in `PATH`, or pass `sf_path` pointing to the executable directly (e.g. `~/.nvm/versions/node/v22.0.0/bin/sf`). A missing `sf` binary returns the error code `SF_NOT_FOUND` with an installation hint. + +--- + +## Performance Tuning + +These environment variables let you control agentic-loop safety and observability without modifying tool code. + +### Agentic loop guard (`PROVAR_MCP_MAX_TOOL_DEPTH`) + +Limits the number of Provar tool calls an AI agent may make within a single MCP session before the server starts returning errors instead of results. + +``` +PROVAR_MCP_MAX_TOOL_DEPTH=30 # allow at most 30 tool calls per session (default: 50) +``` + +Once the limit is reached, every further call returns: + +```json +{ + "error": "TOOL_BUDGET_EXCEEDED", + "callsMade": 30, + "limit": 30, + "suggestion": "Summarize progress and return control to the user." +} +``` + +| Property | Value | +| --------- | -------------------------------------------------------------------------- | +| Default | `50` | +| Scope | Per MCP session (`sessionId` from the MCP SDK) | +| Exemption | `provardx_ping` is never counted or blocked | +| Memory | Sessions are tracked in-process; restarting the server resets all counters | + +The guard is designed to prevent runaway agentic loops from making hundreds of tool calls without human review. Set it lower (e.g. `10`) for tightly supervised workflows; raise it or omit it for long-running automation pipelines where you trust the agent. + +### Per-call token attribution (`PROVAR_MCP_EMIT_TOKEN_META`) + +Appends a `_meta` object to `structuredContent` on every tool response, giving observability tooling a lightweight token-cost signal per call. + +``` +PROVAR_MCP_EMIT_TOKEN_META=true +``` + +When enabled, `structuredContent` gains a `_meta` key: + +```json +{ + "result": "...", + "_meta": { + "tool": "provar_project_inspect", + "detailLevel": "standard", + "estimatedTokens": 412 + } +} +``` + +On `TOOL_BUDGET_EXCEEDED` errors the meta also includes the session cumulative total: + +```json +{ + "_meta": { + "tool": "provar_project_inspect", + "detailLevel": "standard", + "estimatedTokens": 38, + "sessionTotalEstimatedTokens": 8204 + } +} +``` + +| Field | Description | +| ----------------------------- | -------------------------------------------------------------------------------------------- | +| `tool` | Name of the tool that produced this response | +| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) | +| `estimatedTokens` | `ceil(len(JSON.stringify(response)) / 4)` — a rough character-to-token estimate | +| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors | + +> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. diff --git a/package.json b/package.json index ceff6e60..2a0d1730 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@provartesting/provardx-cli", "description": "A plugin for the Salesforce CLI to orchestrate testing activities and report quality metrics to Provar Quality Hub", - "version": "1.5.0", + "version": "1.5.1", "mcpName": "io.github.ProvarTesting/provar", "license": "BSD-3-Clause", "plugins": [ @@ -158,6 +158,7 @@ "src/mcp/rules/*.json", "docs/NITROX_COMPONENT_CATALOG.md", "docs/NITROX_CATALOG_SOURCE.json", + "docs/PROVAR_TOOL_GUIDE.md", "**/tsconfig.json", "messages/**" ], @@ -180,6 +181,9 @@ }, "lint": { "command": "eslint src test --color --cache --cache-location .eslintcache", + "dependencies": [ + "lint:script-names" + ], "files": [ "src/**/*.ts", "test/**/*.ts", @@ -189,6 +193,14 @@ ], "output": [] }, + "lint:script-names": { + "command": "node scripts/lint-script-names.cjs", + "files": [ + "scripts/lint-script-names.cjs", + "scripts/**/*" + ], + "output": [] + }, "test:compile": { "command": "tsc -p \"./test\" --pretty", "files": [ diff --git a/scripts/authoring-flow-trace.cjs b/scripts/authoring-flow-trace.cjs new file mode 100644 index 00000000..06d65311 --- /dev/null +++ b/scripts/authoring-flow-trace.cjs @@ -0,0 +1,251 @@ +// Authoring-flow trace. +// +// Drives the MCP server over JSON-RPC stdio and captures the EXACT bytes that +// an MCP client (Claude Desktop / Cursor / etc.) would surface to its LLM at +// every decision point in the test-authoring flow: +// +// 1. The orchestration prompt the LLM reads when planning ("I want to author a new test case") +// 2. The tool-guide resource the LLM reads when picking the right tool +// 3. The provar_testcase_generate tool description the LLM reads at the call site +// 4. The provar_testcase_step_edit tool description (amend-only contract) +// 5. The actual XML the tool emits when given a real multi-scenario payload +// +// Run from the worktree root after `yarn compile`: +// node scripts/authoring-flow-trace.cjs + +'use strict'; + +const { spawn } = require('child_process'); +const os = require('os'); +const path = require('path'); + +const TMP = os.tmpdir(); +const entry = path.resolve(__dirname, '..', 'bin', 'mcp-start.js'); + +const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], { + stdio: ['pipe', 'pipe', 'inherit'], +}); + +let nextId = 1; +const pending = new Map(); +let buf = ''; + +server.stdout.on('data', (chunk) => { + buf += chunk.toString('utf-8'); + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + const cb = pending.get(msg.id); + if (cb) { + pending.delete(msg.id); + cb(msg); + } + } catch { + /* ignore non-JSON */ + } + } +}); + +function rpc(method, params) { + const id = nextId++; + const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n'; + return new Promise((resolve, reject) => { + pending.set(id, resolve); + setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + reject(new Error(`Timeout waiting for ${method}`)); + } + }, 10000); + server.stdin.write(req); + }); +} + +function divider(label) { + console.log('\n' + '═'.repeat(78)); + console.log(' ' + label); + console.log('═'.repeat(78)); +} + +function subdivider(label) { + console.log('\n' + '─'.repeat(78)); + console.log(' ' + label); + console.log('─'.repeat(78)); +} + +function indent(text, prefix = ' ') { + return text + .split('\n') + .map((l) => prefix + l) + .join('\n'); +} + +function extractSection(text, headerRegex, nextHeaderRegex) { + const startMatch = headerRegex.exec(text); + if (!startMatch) return '
'; + const start = startMatch.index; + const tail = text.slice(start); + const endMatch = nextHeaderRegex.exec(tail.slice(headerRegex.source.length)); + return endMatch ? tail.slice(0, endMatch.index + headerRegex.source.length) : tail; +} + +(async () => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'authoring-flow-trace', version: '1.0.0' }, + }); + + // ── 1. The orchestration prompt's author-test flow ──────────────────────── + divider('TRACE 1 — what the LLM reads when "planning a test-case authoring task"'); + console.log('Tool call simulated: prompts/get(provar.guide.orchestration, task=author-test)'); + console.log('This is what an MCP client surfaces to the LLM as the planning brief.\n'); + + const orch = await rpc('prompts/get', { + name: 'provar.guide.orchestration', + arguments: { task: 'author-test' }, + }); + const orchText = orch.result?.messages?.[0]?.content?.text ?? ''; + console.log(indent(orchText)); + + // ── 2. The tool-guide resource ──────────────────────────────────────────── + divider('TRACE 2 — what the LLM reads when "picking the right tool to author a test"'); + console.log('Tool call simulated: resources/read(provar://docs/tool-guide)'); + console.log('Excerpting the "I want to write a new test" section only.\n'); + + const guide = await rpc('resources/read', { uri: 'provar://docs/tool-guide' }); + const guideText = guide.result?.contents?.[0]?.text ?? ''; + const section = extractSection(guideText, /## "I want to write a new test"/, /\n## "/); + console.log(indent(section)); + + // ── 3. The provar_testcase_generate tool description ────────────────────── + divider('TRACE 3 — what the LLM reads at the call site of provar_testcase_generate'); + console.log('Tool call simulated: tools/list (filtered to provar_testcase_generate)'); + console.log('First 1000 chars of the description string surfaced to the model.\n'); + + const tools = await rpc('tools/list', {}); + const toolList = tools.result?.tools ?? []; + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + console.log( + indent( + (gen?.description ?? '').slice(0, 1000) + (gen?.description?.length > 1000 ? '… (truncated)' : '') + ) + ); + + subdivider('steps[] field description (read by the LLM when filling the argument)'); + const stepsField = gen?.inputSchema?.properties?.steps; + console.log(indent(stepsField?.description ?? '')); + + // ── 4. The provar_testcase_step_edit tool description ───────────────────── + divider('TRACE 4 — what the LLM reads at the call site of provar_testcase_step_edit'); + console.log('Tool call simulated: tools/list (filtered to provar_testcase_step_edit)\n'); + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + console.log( + indent( + (edit?.description ?? '').slice(0, 1000) + (edit?.description?.length > 1000 ? '… (truncated)' : '') + ) + ); + + // ── 5. Real tool call — multi-scenario single-call generate ─────────────── + divider('TRACE 5 — real tool call: provar_testcase_generate with a 3-scenario payload'); + console.log("Tool call simulated: an LLM that follows TRACE 1-3's guidance constructs"); + console.log('the full step tree and passes it in ONE call. We capture the output:\n'); + + const callResult = await rpc('tools/call', { + name: 'provar_testcase_generate', + arguments: { + // eslint-disable-next-line camelcase + test_case_name: 'AccountFlow', + steps: [ + // Scenario 1 — Create Account + { api_id: 'UiConnect', name: 'Salesforce Connect: AdminOauth', attributes: {} }, + { + api_id: 'SetValues', + name: 'Set Account Test Data', + attributes: { AccountName: 'Acme', AccountPhone: '555-0100' }, + }, + { api_id: 'UiNavigate', name: 'Scenario 1 - When: navigate to Account home', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 1 - When: click New', attributes: {} }, + { + api_id: 'SetValues', + name: 'Scenario 1 - When: fill form', + attributes: { Name: '{AccountName}', Phone: '{AccountPhone}' }, + }, + { api_id: 'UiDoAction', name: 'Scenario 1 - When: click Save', attributes: {} }, + // Scenario 2 — Verify on list view (the scenario that went missing on 1.5.0) + { api_id: 'UiNavigate', name: 'Scenario 2 - Then: go to Account list', attributes: {} }, + { + api_id: 'AssertValues', + name: 'Scenario 2 - Then: assert Name on list', + attributes: { expectedValue: '{AccountName}', actualValue: 'Name', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Scenario 2 - Then: assert Phone on list', + attributes: { expectedValue: '{AccountPhone}', actualValue: 'Phone', comparisonType: 'EqualTo' }, + }, + // Scenario 3 — Open detail and assert all + { api_id: 'UiDoAction', name: 'Scenario 3 - When: open Account detail', attributes: {} }, + { + api_id: 'AssertValues', + name: 'Scenario 3 - Then: assert Name on detail', + attributes: { expectedValue: '{AccountName}', actualValue: 'Name', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Scenario 3 - Then: assert Phone on detail', + attributes: { expectedValue: '{AccountPhone}', actualValue: 'Phone', comparisonType: 'EqualTo' }, + }, + ], + dry_run: true, + overwrite: false, + }, + }); + + const content = callResult.result?.content?.[0]?.text ?? '{}'; + const body = JSON.parse(content); + + subdivider('Tool response — top-level fields'); + console.log(indent(`step_count: ${body.step_count}`)); + console.log(indent(`written: ${body.written}`)); + console.log(indent(`is_valid: ${body.validation?.is_valid}`)); + console.log(indent(`validity: ${body.validation?.validity_score}`)); + console.log(indent(`quality: ${body.validation?.quality_score}`)); + console.log(indent(`errors: ${body.validation?.error_count}`)); + + subdivider('Generated XML — assertions a reviewer can run by eye'); + const xml = body.xml_content; + + const checks = [ + [ + 'Sequential testItemIds 1..12, no gaps', + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12].every((n) => xml.includes(`testItemId="${n}"`)), + ], + ['No spurious testItemId="13"', !xml.includes('testItemId="13"')], + ['Scenario 1 - When marker present', xml.includes('Scenario 1 - When: navigate to Account home')], + ['Scenario 2 - Then marker present (the one 1.5.0 dropped)', xml.includes('Scenario 2 - Then: go to Account list')], + ['Scenario 3 - When marker present', xml.includes('Scenario 3 - When: open Account detail')], + ['All 4 AssertValues steps emitted', (xml.match(/AssertValues/g) ?? []).length >= 4], + ['No silent UiAssert substitution', !xml.includes('com.provar.plugins.forcedotcom.core.ui.UiAssert')], + ['{VarName} placeholders emit class="variable"', xml.includes('class="variable"')], + ]; + for (const [label, ok] of checks) { + console.log(indent(`${ok ? '✅' : '❌'} ${label}`)); + } + + subdivider('Raw XML — first 80 lines of what the LLM gets back'); + const xmlLines = xml.split('\n').slice(0, 80); + console.log(indent(xmlLines.join('\n'))); + + server.stdin.end(); + process.exit(0); +})().catch((err) => { + console.error('trace error:', err); + server.kill(); + process.exit(1); +}); diff --git a/scripts/authoring-guidance-validate.cjs b/scripts/authoring-guidance-validate.cjs new file mode 100644 index 00000000..bbe6aa44 --- /dev/null +++ b/scripts/authoring-guidance-validate.cjs @@ -0,0 +1,158 @@ +// Authoring-guidance validation: confirm the author-test guidance (prompt + +// step-reference resource) is reachable and contains the canonical single-call +// construction copy. Runs without requiring sf CLI to be linked to the local +// plugin. +// +// yarn compile +// node scripts/authoring-guidance-validate.cjs + +'use strict'; + +const { spawn } = require('child_process'); +const os = require('os'); +const path = require('path'); + +const TMP = os.tmpdir(); +const entry = path.resolve(__dirname, '..', 'bin', 'mcp-start.js'); + +const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], { + stdio: ['pipe', 'pipe', 'inherit'], +}); + +let nextId = 1; +const pending = new Map(); +let buf = ''; + +server.stdout.on('data', (chunk) => { + buf += chunk.toString('utf-8'); + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + const cb = pending.get(msg.id); + if (cb) { + pending.delete(msg.id); + cb(msg); + } + } catch { + /* ignore */ + } + } +}); + +function rpc(method, params) { + const id = nextId++; + const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n'; + return new Promise((resolve, reject) => { + pending.set(id, resolve); + setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + reject(new Error(`Timeout waiting for ${method}`)); + } + }, 5000); + server.stdin.write(req); + }); +} + +const results = []; +function record(label, ok, detail) { + results.push({ label, ok, detail }); +} + +(async () => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'authoring-guidance-validate', version: '1.0.0' }, + }); + + // The orchestration prompt must remain registered; the author-test flow + // depends on it as the LLM's entry point. + const orch = await rpc('prompts/get', { + name: 'provar.guide.orchestration', + arguments: { task: 'author-test' }, + }); + const text = orch.result?.messages?.[0]?.content?.text ?? ''; + + record( + 'orchestration(author-test) is reachable', + text.length > 0, + text.length > 0 ? `received ${text.length} chars` : `no text returned` + ); + + // Canonical single-call construction copy + const mustInclude = ['single call', 'ALL steps', 'amend']; + for (const phrase of mustInclude) { + const present = text.includes(phrase); + record( + `author-test includes "${phrase}"`, + present, + present ? `present` : `MISSING — fix would not stop the regression` + ); + } + + // Multi-call construction anti-patterns + const mustExclude = ['repeat per step']; + for (const phrase of mustExclude) { + const present = text.includes(phrase); + record(`author-test excludes "${phrase}"`, !present, present ? `STILL PRESENT — regression risk` : `removed`); + } + + // General orchestration flow's prerequisite graph + const general = await rpc('prompts/get', { + name: 'provar.guide.orchestration', + arguments: {}, + }); + const gtext = general.result?.messages?.[0]?.content?.text ?? ''; + record( + 'prerequisite graph splits generate and step_edit', + !gtext.includes('provar_testcase_generate OR provar_testcase_step_edit'), + gtext.includes('provar_testcase_generate OR provar_testcase_step_edit') + ? `STILL CONFLATED — fix incomplete` + : `split confirmed` + ); + + // Tool-guide resource must serve content; LLMs read it when picking a tool. + const guide = await rpc('resources/read', { uri: 'provar://docs/tool-guide' }); + const gcontent = guide.result?.contents?.[0]?.text ?? ''; + record( + 'tool-guide resource is reachable', + gcontent.length > 0, + gcontent.length > 0 ? `received ${gcontent.length} chars` : `not served` + ); + record( + 'tool-guide author-test section recommends single call', + gcontent.includes('single call') || gcontent.includes('one payload'), + gcontent.includes('single call') || gcontent.includes('one payload') + ? `recommended phrasing found` + : `MISSING canonical phrasing in resource` + ); + record( + 'tool-guide author-test section excludes "repeat per step"', + !gcontent.includes('repeat per step'), + gcontent.includes('repeat per step') ? `STILL PRESENT — regression risk` : `removed` + ); + + let pass = 0; + let fail = 0; + for (const r of results) { + console.log(`${r.ok ? '[PASS]' : '[FAIL]'} ${r.label} — ${r.detail}`); + if (r.ok) { + pass++; + } else { + fail++; + } + } + console.log(`\nAuthoring-guidance validation: ${pass} passed, ${fail} failed`); + + server.stdin.end(); + process.exit(fail > 0 ? 1 : 0); +})().catch((err) => { + console.error('Validation script error:', err); + server.kill(); + process.exit(2); +}); diff --git a/scripts/construction-contract-validate.cjs b/scripts/construction-contract-validate.cjs new file mode 100644 index 00000000..ee2f89cd --- /dev/null +++ b/scripts/construction-contract-validate.cjs @@ -0,0 +1,464 @@ +// Construction-contract validation: confirm the construct/amend contract is +// reachable at every MCP protocol surface the LLM sees, and that the runtime +// guard rejects the multi-call construction shape. +// +// Description-contract pass (standard + compact schema modes): assertions on +// tools/list description bodies — every byte the LLM literally sees at the +// call site. Compact mode coverage is critical because +// PROVAR_MCP_SCHEMA_MODE=compact swaps the description for a short one-liner; +// if the contract isn't in that form, compact mode becomes a regression vector. +// +// Runtime-guard pass: drives a real tools/call with the rejected shape +// (steps:[] + dry_run:false + output_path) and asserts the response is a +// structured STEPS_REQUIRED error with a non-empty details.suggestion. This +// catches a regression that the description-pass assertions cannot reach: the +// passive contract surviving in the description while the active guard silently +// regresses (e.g. a refactor reorders the handler so writes happen before the +// check). +// +// Title-contract pass: assertions on the `title:` field that some clients +// render exclusively (Claude Desktop chips, Cursor audit pane, inline tool-call +// refs). Titles are schema-mode-independent but we assert in both passes to +// surface drift early either way. +// +// yarn compile +// node scripts/construction-contract-validate.cjs + +'use strict'; + +const fs = require('fs'); +const { spawn } = require('child_process'); +const os = require('os'); +const path = require('path'); + +const TMP = os.tmpdir(); +const entry = path.resolve(__dirname, '..', 'bin', 'mcp-start.js'); + +/** + * Spawn an MCP server in the given schema mode and run a set of assertions + * against tools/list. Returns the list of results. + * + * @param {string} mode - human-readable label, e.g. "standard" or "compact" + * @param {Record} extraEnv - env vars to merge into spawn env + * @param {(toolList: Array, record: (label: string, ok: boolean, detail: string) => void) => void} runAssertions + */ +function runValidation(mode, extraEnv, runAssertions) { + return new Promise((resolve, reject) => { + const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], { + stdio: ['pipe', 'pipe', 'inherit'], + env: { ...process.env, ...extraEnv }, + }); + + let nextId = 1; + const pending = new Map(); + let buf = ''; + + server.stdout.on('data', (chunk) => { + buf += chunk.toString('utf-8'); + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + const cb = pending.get(msg.id); + if (cb) { + pending.delete(msg.id); + cb(msg); + } + } catch { + /* ignore */ + } + } + }); + + const rpc = (method, params) => { + const id = nextId++; + const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n'; + return new Promise((rpcResolve, rpcReject) => { + pending.set(id, rpcResolve); + setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + rpcReject(new Error(`Timeout waiting for ${method}`)); + } + }, 10000); + server.stdin.write(req); + }); + }; + + const modeResults = []; + const record = (label, ok, detail) => { + modeResults.push({ label: `[${mode}] ${label}`, ok, detail }); + }; + + (async () => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'construction-contract-validate', version: '1.0.0' }, + }); + const tools = await rpc('tools/list', {}); + const toolList = tools.result?.tools ?? []; + runAssertions(toolList, record); + server.stdin.end(); + resolve(modeResults); + })().catch((err) => { + server.kill(); + reject(err); + }); + }); +} + +// ── Title-level construct-vs-amend contract ──────────────────────────────── +// The `title:` field is independent of schema mode, but we assert it in both +// passes to catch drift early regardless of which mode a future refactor breaks. +// Many MCP clients render only the title field in tool-picker chips, so the +// contract must survive at that surface too. +function titleAssertions(toolList, record) { + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + if (!gen) { + record('provar_testcase_generate has a title', false, 'tool not found'); + } else { + const t = gen.title ?? ''; + record( + 'generate.title carries "one call" or "single call"', + t.includes('one call') || t.includes('single call'), + `title: ${JSON.stringify(t)}` + ); + record('generate.title mentions steps', /step/i.test(t), 'chip-level payload shape must be visible'); + record('generate.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`); + } + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + if (!edit) { + record('provar_testcase_step_edit has a title', false, 'tool not found'); + } else { + const t = edit.title ?? ''; + record('step_edit.title contains "Amend" or "amendment"', /amend/i.test(t), `title: ${JSON.stringify(t)}`); + record( + 'step_edit.title signals "existing" test case only', + /exist/i.test(t), + 'chip-level signal that this tool does not construct new cases' + ); + record('step_edit.title length ≤ 50 chars', t.length <= 50, `length: ${t.length}`); + } +} + +// ── Assertions for standard mode (full TOOL_DESCRIPTION) ──────────────────── +function standardAssertions(toolList, record) { + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + if (!gen) { + record('provar_testcase_generate is registered', false, 'tool not found'); + } else { + const d = gen.description ?? ''; + record( + 'generate.description leads with "Construction pattern"', + /^[^.]*Construction pattern/.test(d), + d.slice(0, 80) + ); + record( + 'generate.description contains "single call"', + d.includes('single call'), + 'protects against the multi-call construction regression at call site' + ); + record( + 'generate.description contains "FULL step tree"', + d.includes('FULL step tree'), + 'instructs full payload in one call' + ); + record( + 'generate.description contains "AMENDING"', + d.includes('AMENDING'), + 'marks step_edit as amendment-only at the generate call site' + ); + record( + 'generate.description rejects CONSTRUCTING via step_edit', + // Literal substring (not regex) — a regex match would false-positive on + // hostile rewordings like "constructing...not via generate". + d.includes('not for CONSTRUCTING one from scratch'), + 'literal canonical phrase: "not for CONSTRUCTING one from scratch"' + ); + record( + 'generate.description: contract appears in the first 200 chars', + d.indexOf('Construction pattern') >= 0 && d.indexOf('Construction pattern') < 200, + `position: ${d.indexOf( + 'Construction pattern' + )} (LLMs weight leading tokens more; truncating clients cut at ~1024)` + ); + record( + 'generate.description gives stop-and-assemble guidance', + d.includes('stop and assemble') || d.includes('stop, and assemble'), + 'tells agents what to do when they catch themselves in the multi-call pattern' + ); + + const stepsField = gen.inputSchema?.properties?.steps; + const fd = stepsField?.description ?? ''; + record( + 'generate.steps.description contains "COMPLETE step tree"', + fd.includes('COMPLETE step tree'), + 'field-level contract' + ); + record( + 'generate.steps.description contains "single call"', + fd.includes('single call'), + 'field-level single-call reminder' + ); + record( + 'generate.steps.description warns about amendments-only step_edit', + fd.includes('amendments only') || fd.includes('for amendments'), + 'field-level amend-only warning' + ); + } + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + if (!edit) { + record('provar_testcase_step_edit is registered', false, 'tool not found'); + } else { + const d = edit.description ?? ''; + record( + 'step_edit.description self-identifies as AMENDMENT-ONLY', + d.includes('AMENDMENT-ONLY') || d.includes('AMENDING'), + 'lead-in framing the LLM reads first' + ); + record( + 'step_edit.description rejects construct-from-scratch usage', + d.includes('NOT for constructing') || d.includes('not for constructing'), + 'explicit rejection at call site' + ); + record( + 'step_edit.description points at provar_testcase_generate for new test cases', + d.includes('provar_testcase_generate'), + 'tells LLM where to go instead' + ); + record( + 'step_edit.description spells out the structural defects from misuse', + d.includes('dropped scenarios') || d.includes('flat asserts') || d.includes('inconsistent step types'), + 'consequence is explicit so the contract is judgement-friendly' + ); + } + + // Title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); +} + +// ── Assertions for compact mode (short one-liner) ─────────────────────────── +// The compact form must STILL carry the contract or PROVAR_MCP_SCHEMA_MODE=compact +// becomes a regression highway (the standard description is swapped out entirely). +function compactAssertions(toolList, record) { + const gen = toolList.find((t) => t.name === 'provar_testcase_generate'); + if (!gen) { + record('provar_testcase_generate is registered', false, 'tool not found'); + } else { + const d = gen.description ?? ''; + record( + 'compact generate.description carries single-call contract', + d.includes('ONE call'), + 'must mention "ONE call" so contract is visible even when the standard form is stripped' + ); + record( + 'compact generate.description carries FULL steps[] tree contract', + d.includes('FULL steps'), + 'must mention FULL steps[] in the compact form' + ); + record( + 'compact generate.description carries AMENDING vs CONSTRUCTING framing', + d.includes('AMENDING') && d.includes('CONSTRUCTING'), + 'must split AMENDING (step_edit) vs CONSTRUCTING (generate) in the compact form' + ); + record( + 'compact generate.description does NOT regress to a contract-free one-liner', + !/^Generate a Provar XML test case skeleton with UUID guids and steps structure\.?$/.test(d), + 'old compact form must be replaced' + ); + } + + const edit = toolList.find((t) => t.name === 'provar_testcase_step_edit'); + if (!edit) { + record('provar_testcase_step_edit is registered', false, 'tool not found'); + } else { + const d = edit.description ?? ''; + record( + 'compact step_edit.description self-identifies as AMENDMENT-ONLY', + d.includes('AMENDMENT-ONLY') || d.includes('amendment') || d.includes('AMENDING'), + 'amendment framing must survive compact mode' + ); + record( + 'compact step_edit.description rejects construct-from-scratch usage', + d.includes('not for constructing') || d.includes('NOT for constructing') || d.includes('not for CONSTRUCTING'), + 'rejection must survive compact mode' + ); + } + + // Title-level contract — runs in both modes to surface drift. + titleAssertions(toolList, record); +} + +// ── Runtime guard: tools/call assertion ───────────────────────────────────── +// Drives a real tools/call(provar_testcase_generate, ...) with the rejected +// shape (steps:[] + dry_run:false + output_path) and asserts the response is +// a structured STEPS_REQUIRED error. This is the only check that catches a +// silent regression where the passive description survives but the active +// runtime guard is removed or reordered after a side effect. +function runRuntimeGuardValidation() { + return new Promise((resolve, reject) => { + const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], { + stdio: ['pipe', 'pipe', 'inherit'], + env: { ...process.env }, + }); + + let nextId = 1; + const pending = new Map(); + let buf = ''; + + server.stdout.on('data', (chunk) => { + buf += chunk.toString('utf-8'); + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + const cb = pending.get(msg.id); + if (cb) { + pending.delete(msg.id); + cb(msg); + } + } catch { + /* ignore */ + } + } + }); + + const rpc = (method, params) => { + const id = nextId++; + const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n'; + return new Promise((rpcResolve, rpcReject) => { + pending.set(id, rpcResolve); + setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + rpcReject(new Error(`Timeout waiting for ${method}`)); + } + }, 10000); + server.stdin.write(req); + }); + }; + + const results = []; + const record = (label, ok, detail) => { + results.push({ label: `[runtime-guard] ${label}`, ok, detail }); + }; + + (async () => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'construction-contract-validate-runtime', version: '1.0.0' }, + }); + + // Use a unique tmp path so a leftover file from a prior run can't mask the assertion. + const outPath = path.join(TMP, `construction-contract-validate-${Date.now()}.testcase`); + try { + if (fs.existsSync(outPath)) fs.unlinkSync(outPath); + } catch { + /* best-effort */ + } + + const callRes = await rpc('tools/call', { + name: 'provar_testcase_generate', + arguments: { + test_case_name: 'construction-contract validate', + steps: [], + dry_run: false, + output_path: outPath, + }, + }); + + // MCP tools/call returns { result: { content: [{ type, text }], isError? } }. + // The tool's error body is JSON-encoded in content[0].text. + const result = callRes.result; + record( + 'tools/call returned a result (no protocol-level error)', + !!result && !callRes.error, + callRes.error ? JSON.stringify(callRes.error).slice(0, 120) : 'protocol OK' + ); + record( + 'result.isError === true (tool-level rejection)', + result?.isError === true, + `isError: ${String(result?.isError)} — rejection must surface at content level` + ); + + let body = null; + try { + body = JSON.parse(result?.content?.[0]?.text ?? '{}'); + } catch (parseErr) { + record('content[0].text parses as JSON', false, parseErr.message); + } + record( + 'error_code === "STEPS_REQUIRED"', + body?.error_code === 'STEPS_REQUIRED', + `error_code: ${body?.error_code} — must match the documented code from docs/mcp.md` + ); + record( + 'retryable === false', + body?.retryable === false, + 'STEPS_REQUIRED is a contract violation — retrying with the same payload would never succeed' + ); + record( + 'details.suggestion is a non-empty string', + typeof body?.details?.suggestion === 'string' && body.details.suggestion.length > 0, + 'details.suggestion must tell the LLM how to self-correct (canonical multi-call rejection text)' + ); + record( + 'details.suggestion mentions "FULL step tree"', + typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('FULL step tree'), + 'suggestion must point the LLM at the single-call pattern' + ); + record( + 'details.suggestion mentions dry_run=true escape hatch', + typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('dry_run=true'), + 'suggestion must mention dry_run=true for legitimate skeleton-inspection callers' + ); + record( + 'no file written at output_path (zero side effects)', + !fs.existsSync(outPath), + 'STEPS_REQUIRED must run BEFORE fs.writeFileSync — no skeleton on disk' + ); + + server.stdin.end(); + resolve(results); + })().catch((err) => { + server.kill(); + reject(err); + }); + }); +} + +(async () => { + const standardResults = await runValidation('standard', {}, standardAssertions); + // Explicitly null out the env var on the standard pass to ensure no leakage. + // For compact, set PROVAR_MCP_SCHEMA_MODE=compact via the spawn env. + const compactResults = await runValidation('compact', { PROVAR_MCP_SCHEMA_MODE: 'compact' }, compactAssertions); + const runtimeGuardResults = await runRuntimeGuardValidation(); + + const allResults = [...standardResults, ...compactResults, ...runtimeGuardResults]; + + let pass = 0; + let fail = 0; + for (const r of allResults) { + console.log(`${r.ok ? '[PASS]' : '[FAIL]'} ${r.label} — ${r.detail}`); + if (r.ok) { + pass++; + } else { + fail++; + } + } + console.log(`\nConstruction-contract validation: ${pass} passed, ${fail} failed`); + process.exit(fail > 0 ? 1 : 0); +})().catch((err) => { + console.error('Validation script error:', err); + process.exit(2); +}); diff --git a/scripts/lint-script-names.cjs b/scripts/lint-script-names.cjs new file mode 100644 index 00000000..68a83442 --- /dev/null +++ b/scripts/lint-script-names.cjs @@ -0,0 +1,55 @@ +// Script-name lint: enforces the convention that files under scripts/ are +// named by what they DO, not by which ticket prompted them. +// +// Why: ticket-prefixed filenames anchor the codebase to internal Jira IDs, +// confuse future readers when the original ticket is closed/archived, and +// leak internal process language into customer-visible artifacts (CI logs, +// PR diffs, file trees that pilots may receive). Behaviour-named scripts +// stay readable as the codebase evolves. +// +// Rule: no file ANYWHERE under scripts/ (including nested subdirectories) +// may have a basename matching /^pdx[-_]?\d+/i. The walk is recursive so a +// nested `scripts/tmp/pdx-123.cjs` does not bypass the gate. +// +// Run: +// node scripts/lint-script-names.cjs +// Or via the lint chain: +// yarn lint # wireit runs lint:script-names as a dependency + +'use strict'; + +const fs = require('node:fs'); +const path = require('node:path'); + +const SCRIPTS_DIR = path.resolve(__dirname); +const TICKET_PREFIX_RE = /^pdx[-_]?\d+/i; + +function walk(dir) { + const out = []; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + out.push(...walk(full)); + } else if (entry.isFile()) { + out.push(full); + } + } + return out; +} + +const offenders = walk(SCRIPTS_DIR) + .filter((full) => TICKET_PREFIX_RE.test(path.basename(full))) + .map((full) => path.relative(path.dirname(SCRIPTS_DIR), full).replace(/\\/g, '/')) + .sort(); + +if (offenders.length === 0) { + console.log('lint-script-names: OK (no ticket-prefixed script filenames under scripts/)'); + process.exit(0); +} + +console.error('lint-script-names: FAIL — scripts/ contains ticket-prefixed filenames:'); +for (const rel of offenders) console.error(` - ${rel}`); +console.error( + '\nRename each file to describe what it DOES, not which ticket added it (e.g. `authoring-flow-trace.cjs` instead of `pdx-481-trace.cjs`).' +); +process.exit(1); diff --git a/scripts/mcp-smoke.cjs b/scripts/mcp-smoke.cjs index 81f5042e..6cea444b 100644 --- a/scripts/mcp-smoke.cjs +++ b/scripts/mcp-smoke.cjs @@ -3,7 +3,11 @@ // PASS = JSON-RPC result received (tool responded; content may still contain an error code — that's fine) // FAIL = JSON-RPC error (protocol-level: unknown method, missing required arg, server crash, timeout) // -// Usage: node scripts/mcp-smoke.cjs [2>$null] +// Usage: node scripts/mcp-smoke.cjs [--profile ] [2>$null] +// --profile Comma-separated list of tool groups to exercise (default: all groups). +// Group names match PROVAR_MCP_TOOLS values: nitrox, automation, qualityhub, +// validation, authoring, inspect, connection, rca. +// Example: node scripts/mcp-smoke.cjs --profile automation,qualityhub // Note: Run with stderr suppressed to avoid sf update warnings mixing into output. // // Env flags: @@ -22,6 +26,31 @@ const REQUEST_TIMEOUT_MS = Number(process.env['SMOKE_REQUEST_TIMEOUT_MS'] ?? 30_ const OVERALL_TIMEOUT_MS = Number(process.env['SMOKE_OVERALL_TIMEOUT_MS'] ?? 120_000); const INCLUDE_SETUP = process.env['SMOKE_INCLUDE_SETUP'] === '1'; +// --profile flag: restrict which tool groups are exercised +const profileArg = (() => { + const idx = process.argv.indexOf('--profile'); + if (idx !== -1 && process.argv[idx + 1]) return process.argv[idx + 1]; + const eq = process.argv.find((a) => a.startsWith('--profile=')); + return eq ? eq.slice('--profile='.length) : null; +})(); +const ACTIVE_GROUPS = profileArg + ? new Set( + profileArg + .split(',') + .map((g) => g.trim().toLowerCase()) + .filter(Boolean) + ) + : null; + +/** Returns true if the group should be exercised (profile includes it, or no profile set). */ +function inGroup(group) { + return ACTIVE_GROUPS === null || ACTIVE_GROUPS.has(group); +} + +if (ACTIVE_GROUPS) { + console.log(`Profile: [${[...ACTIVE_GROUPS].join(', ')}] — skipping other groups`); +} + // ---------------------------------------------------------------------------- // Server process // ---------------------------------------------------------------------------- @@ -31,6 +60,7 @@ const server = spawn('sf', ['provar', 'mcp', 'start', '--allowed-paths', TMP], { env: { ...process.env, PROVAR_DEV_WHITELIST_KEYS: process.env.PROVAR_DEV_WHITELIST_KEYS || '', + ...(ACTIVE_GROUPS ? { PROVAR_MCP_TOOLS: [...ACTIVE_GROUPS].join(',') } : {}), }, }); @@ -76,7 +106,10 @@ overallTimer.unref(); // don't prevent natural exit if tests finish early // ---------------------------------------------------------------------------- // RPC helpers (with per-request timeout) // ---------------------------------------------------------------------------- +let expectedCount = 0; + function rpc(label, method, params) { + expectedCount++; return new Promise((resolve) => { const id = ++msgId; const timer = setTimeout(() => { @@ -120,204 +153,238 @@ async function runTests() { // ── 3. provar_project_inspect ───────────────────────────────────────────── // TMP has no .testproject → structured "not a Provar project" response - await callTool('provar_project_inspect', { project_path: TMP }); + if (inGroup('inspect')) await callTool('provar_project_inspect', { project_path: TMP }); // ── 4. provar_pageobject_generate (dry_run) ─────────────────────────────── - await callTool('provar_pageobject_generate', { - class_name: 'AccountDetailPage', - package_name: 'pageobjects.accounts', - page_type: 'standard', - dry_run: true, - }); + if (inGroup('authoring')) + await callTool('provar_pageobject_generate', { + class_name: 'AccountDetailPage', + package_name: 'pageobjects.accounts', + page_type: 'standard', + dry_run: true, + }); // ── 5. provar_pageobject_validate ───────────────────────────────────────── - await callTool('provar_pageobject_validate', { - content: 'public class AccountDetailPage {}', - }); + if (inGroup('validation')) + await callTool('provar_pageobject_validate', { + content: 'public class AccountDetailPage {}', + }); // ── 6. provar_testcase_generate (dry_run) ───────────────────────────────── - await callTool('provar_testcase_generate', { - test_case_name: 'Smoke Test Case', - dry_run: true, - }); + if (inGroup('authoring')) + await callTool('provar_testcase_generate', { + test_case_name: 'Smoke Test Case', + dry_run: true, + }); + + // ── 6b. provar_testcase_generate STEPS_REQUIRED runtime guard ──────────── + // Drives the rejected shape (steps:[] + dry_run:false + output_path) so the + // multi-call construction shape is exercised on every smoke run. The smoke + // framework counts any JSON-RPC response as PASS; the assertion that the + // body carries error_code='STEPS_REQUIRED' lives in + // scripts/construction-contract-validate.cjs. + if (inGroup('authoring')) + await callTool('provar_testcase_generate', { + test_case_name: 'STEPS_REQUIRED Guard Smoke', + steps: [], + dry_run: false, + output_path: path.join(TMP, 'steps-required-smoke-rejected.testcase'), + }); // ── 7. provar_testcase_validate ─────────────────────────────────────────── - await callTool('provar_testcase_validate', { content: '' }); + if (inGroup('validation')) await callTool('provar_testcase_validate', { content: '' }); // ── 8. provar_testsuite_validate ────────────────────────────────────────── - await callTool('provar_testsuite_validate', { suite_name: 'SmokeTestSuite' }); + if (inGroup('validation')) await callTool('provar_testsuite_validate', { suite_name: 'SmokeTestSuite' }); // ── 9. provar_testplan_validate ─────────────────────────────────────────── - await callTool('provar_testplan_validate', { plan_name: 'SmokeTestPlan' }); + if (inGroup('validation')) await callTool('provar_testplan_validate', { plan_name: 'SmokeTestPlan' }); // ── 10. provar_project_validate ─────────────────────────────────────────── // TMP is not a Provar project → PATH_NOT_FOUND or NOT_A_PROJECT result - await callTool('provar_project_validate', { project_path: TMP }); + if (inGroup('validation')) await callTool('provar_project_validate', { project_path: TMP }); // ── 11. provar_properties_generate (dry_run) ────────────────────────────── - await callTool('provar_properties_generate', { - output_path: path.join(TMP, 'smoke-props.json'), - dry_run: true, - }); + if (inGroup('validation')) + await callTool('provar_properties_generate', { + output_path: path.join(TMP, 'smoke-props.json'), + dry_run: true, + }); // ── 12. provar_properties_read ──────────────────────────────────────────── // Non-existent file → FILE_NOT_FOUND result - await callTool('provar_properties_read', { - file_path: path.join(TMP, 'nonexistent-props.json'), - }); + if (inGroup('validation')) + await callTool('provar_properties_read', { + file_path: path.join(TMP, 'nonexistent-props.json'), + }); // ── 13. provar_properties_set ───────────────────────────────────────────── // Non-existent file → FILE_NOT_FOUND result - await callTool('provar_properties_set', { - file_path: path.join(TMP, 'nonexistent-props.json'), - updates: { stopOnError: true }, - }); + if (inGroup('validation')) + await callTool('provar_properties_set', { + file_path: path.join(TMP, 'nonexistent-props.json'), + updates: { stopOnError: true }, + }); // ── 14. provar_properties_validate ─────────────────────────────────────── // Empty JSON → validation issues about missing required fields - await callTool('provar_properties_validate', { content: '{}' }); + if (inGroup('validation')) await callTool('provar_properties_validate', { content: '{}' }); // ── 15. provar_ant_generate (dry_run) ───────────────────────────────────── - await callTool('provar_ant_generate', { - provar_home: path.join(TMP, 'provar'), - filesets: [{ dir: '../tests' }], - dry_run: true, - }); + if (inGroup('validation')) + await callTool('provar_ant_generate', { + provar_home: path.join(TMP, 'provar'), + filesets: [{ dir: '../tests' }], + dry_run: true, + }); // ── 16. provar_ant_validate ─────────────────────────────────────────────── // Minimal XML — will have validation issues but not crash - await callTool('provar_ant_validate', { content: '' }); + if (inGroup('validation')) await callTool('provar_ant_validate', { content: '' }); // ── 17. provar_qualityhub_connect ───────────────────────────────────────── // No real org → SF_NOT_FOUND or auth error result - await callTool('provar_qualityhub_connect', { target_org: 'smoke-test-org' }); + if (inGroup('qualityhub')) await callTool('provar_qualityhub_connect', { target_org: 'smoke-test-org' }); // ── 18. provar_qualityhub_display ───────────────────────────────────────── - await callTool('provar_qualityhub_display', {}); + if (inGroup('qualityhub')) await callTool('provar_qualityhub_display', {}); // ── 19. provar_qualityhub_testrun ───────────────────────────────────────── - await callTool('provar_qualityhub_testrun', { target_org: 'smoke-test-org' }); + if (inGroup('qualityhub')) await callTool('provar_qualityhub_testrun', { target_org: 'smoke-test-org' }); // ── 20. provar_qualityhub_testrun_report ────────────────────────────────── - await callTool('provar_qualityhub_testrun_report', { - target_org: 'smoke-test-org', - run_id: 'fake-run-id-000', - }); + if (inGroup('qualityhub')) + await callTool('provar_qualityhub_testrun_report', { + target_org: 'smoke-test-org', + run_id: 'fake-run-id-000', + }); // ── 21. provar_qualityhub_testrun_abort ─────────────────────────────────── - await callTool('provar_qualityhub_testrun_abort', { - target_org: 'smoke-test-org', - run_id: 'fake-run-id-000', - }); + if (inGroup('qualityhub')) + await callTool('provar_qualityhub_testrun_abort', { + target_org: 'smoke-test-org', + run_id: 'fake-run-id-000', + }); // ── 22. provar_qualityhub_testcase_retrieve ─────────────────────────────── - await callTool('provar_qualityhub_testcase_retrieve', { target_org: 'smoke-test-org' }); + if (inGroup('qualityhub')) await callTool('provar_qualityhub_testcase_retrieve', { target_org: 'smoke-test-org' }); // ── 23. provar_qualityhub_defect_create ─────────────────────────────────── - await callTool('provar_qualityhub_defect_create', { - run_id: 'fake-run-id-000', - target_org: 'smoke-test-org', - }); + if (inGroup('qualityhub')) + await callTool('provar_qualityhub_defect_create', { + run_id: 'fake-run-id-000', + target_org: 'smoke-test-org', + }); // ── 24. provar_automation_setup ─────────────────────────────────────────── // Skipped by default: when no Provar installation is found on the CI runner, // this tool downloads the full Provar binary (~200 MB), which is a destructive // side effect in a smoke test. Enable with SMOKE_INCLUDE_SETUP=1. - if (INCLUDE_SETUP) { + if (INCLUDE_SETUP && inGroup('automation')) { await callTool('provar_automation_setup', {}); } // ── 25. provar_automation_metadata_download ─────────────────────────────── - await callTool('provar_automation_metadata_download', {}); + if (inGroup('automation')) await callTool('provar_automation_metadata_download', {}); // ── 26. provar_automation_compile ───────────────────────────────────────── - await callTool('provar_automation_compile', {}); + if (inGroup('automation')) await callTool('provar_automation_compile', {}); // ── 27. provar_automation_testrun ───────────────────────────────────────── - await callTool('provar_automation_testrun', {}); + if (inGroup('automation')) await callTool('provar_automation_testrun', {}); // ── 28. provar_automation_config_load ───────────────────────────────────── - await callTool('provar_automation_config_load', { - properties_path: path.join(TMP, 'nonexistent-props.json'), - }); + if (inGroup('automation')) + await callTool('provar_automation_config_load', { + properties_path: path.join(TMP, 'nonexistent-props.json'), + }); // ── 29. provar_testrun_report_locate ───────────────────────────────────── // TMP is not a Provar project → RESULTS_NOT_CONFIGURED result - await callTool('provar_testrun_report_locate', { project_path: TMP }); + if (inGroup('rca')) await callTool('provar_testrun_report_locate', { project_path: TMP }); // ── 30. provar_testrun_rca ─────────────────────────────────────────────── - await callTool('provar_testrun_rca', { project_path: TMP }); + if (inGroup('rca')) await callTool('provar_testrun_rca', { project_path: TMP }); // ── 31. provar_testplan_create ──────────────────────────────────────────── // TMP is not a Provar project → NOT_A_PROJECT result - await callTool('provar_testplan_create', { - project_path: TMP, - plan_name: 'SmokePlan', - }); + if (inGroup('authoring')) + await callTool('provar_testplan_create', { + project_path: TMP, + plan_name: 'SmokePlan', + }); // ── 32. provar_testplan_add-instance ───────────────────────────────────── // TMP is not a Provar project → NOT_A_PROJECT result - await callTool('provar_testplan_add-instance', { - project_path: TMP, - test_case_path: 'tests/Smoke/SmokeTest.testcase', - plan_name: 'SmokePlan', - }); + if (inGroup('authoring')) + await callTool('provar_testplan_add-instance', { + project_path: TMP, + test_case_path: 'tests/Smoke/SmokeTest.testcase', + plan_name: 'SmokePlan', + }); // ── 33. provar_testplan_create-suite ───────────────────────────────────── - await callTool('provar_testplan_create-suite', { - project_path: TMP, - plan_name: 'SmokePlan', - suite_name: 'SmokeSuite', - }); + if (inGroup('authoring')) + await callTool('provar_testplan_create-suite', { + project_path: TMP, + plan_name: 'SmokePlan', + suite_name: 'SmokeSuite', + }); // ── 34. provar_testplan_remove-instance ────────────────────────────────── - await callTool('provar_testplan_remove-instance', { - project_path: TMP, - instance_path: 'plans/SmokePlan/SmokeSuite/smoke.testinstance', - }); + if (inGroup('authoring')) + await callTool('provar_testplan_remove-instance', { + project_path: TMP, + instance_path: 'plans/SmokePlan/SmokeSuite/smoke.testinstance', + }); // ── 35. provar_nitrox_discover ──────────────────────────────────────────── // TMP has no .testproject → empty projects list, no crash - await callTool('provar_nitrox_discover', { search_roots: [TMP] }); + if (inGroup('nitrox')) await callTool('provar_nitrox_discover', { search_roots: [TMP] }); // ── 36. provar_nitrox_validate ──────────────────────────────────────────── // Minimal valid root component → score 100 - await callTool('provar_nitrox_validate', { - content: JSON.stringify({ - componentId: '550e8400-e29b-41d4-a716-446655440000', - name: '/com/smoke/SmokeComponent', - type: 'Block', - pageStructureElement: true, - fieldDetailsElement: false, - }), - }); + if (inGroup('nitrox')) + await callTool('provar_nitrox_validate', { + content: JSON.stringify({ + componentId: '550e8400-e29b-41d4-a716-446655440000', + name: '/com/smoke/SmokeComponent', + type: 'Block', + pageStructureElement: true, + fieldDetailsElement: false, + }), + }); // ── 36. provar_nitrox_generate (dry_run) ───────────────────────────────── - await callTool('provar_nitrox_generate', { - name: '/com/smoke/SmokeComponent', - tag_name: 'c-smoke', - dry_run: true, - }); + if (inGroup('nitrox')) + await callTool('provar_nitrox_generate', { + name: '/com/smoke/SmokeComponent', + tag_name: 'c-smoke', + dry_run: true, + }); // ── 37. provar_nitrox_read ──────────────────────────────────────────────── // Non-existent file → FILE_NOT_FOUND result (not a protocol error) - await callTool('provar_nitrox_read', { - file_paths: [path.join(TMP, 'nonexistent.po.json')], - }); + if (inGroup('nitrox')) + await callTool('provar_nitrox_read', { + file_paths: [path.join(TMP, 'nonexistent.po.json')], + }); // ── 38. provar_nitrox_patch ─────────────────────────────────────────────── // Non-existent file → FILE_NOT_FOUND result (not a protocol error) - await callTool('provar_nitrox_patch', { - file_path: path.join(TMP, 'nonexistent.po.json'), - patch: { name: '/com/smoke/Patched' }, - }); + if (inGroup('nitrox')) + await callTool('provar_nitrox_patch', { + file_path: path.join(TMP, 'nonexistent.po.json'), + patch: { name: '/com/smoke/Patched' }, + }); // ── 39. provar_qualityhub_examples_retrieve ─────────────────────────────── // No API key in CI → graceful degrade with warning, empty examples (isError: false) - await callTool('provar_qualityhub_examples_retrieve', { - query: 'As a sales rep I want to create an Opportunity in Salesforce', - n: 3, - }); + if (inGroup('qualityhub')) + await callTool('provar_qualityhub_examples_retrieve', { + query: 'As a sales rep I want to create an Opportunity in Salesforce', + n: 3, + }); // ── 40. prompts/list ────────────────────────────────────────────────────── await send('prompts/list', {}); @@ -383,15 +450,16 @@ async function runTests() { // ── 52. provar_connection_list ──────────────────────────────────────────── // TMP has no .testproject → CONNECTION_FILE_NOT_FOUND result (not a protocol error) - await callTool('provar_connection_list', { project_path: TMP }); + if (inGroup('connection')) await callTool('provar_connection_list', { project_path: TMP }); // ── 53. provar_testcase_step_edit ───────────────────────────────────────── // TMP/nonexistent.testcase does not exist → FILE_NOT_FOUND result - await callTool('provar_testcase_step_edit', { - test_case_path: path.join(TMP, 'nonexistent.testcase'), - mode: 'remove', - test_item_id: '1', - }); + if (inGroup('authoring')) + await callTool('provar_testcase_step_edit', { + test_case_path: path.join(TMP, 'nonexistent.testcase'), + mode: 'remove', + test_item_id: '1', + }); server.stdin.end(); } @@ -401,8 +469,7 @@ async function runTests() { // ---------------------------------------------------------------------------- server.on('close', () => { clearTimeout(overallTimer); - // initialize + tools/list + 40 tools + prompts/list + 11 prompts/get (setup excluded from default count) - const TOTAL_EXPECTED = 54 + (INCLUDE_SETUP ? 1 : 0); + const TOTAL_EXPECTED = expectedCount; let passed = 0; let failed = 0; diff --git a/scripts/token-measure-vs-playwright.cjs b/scripts/token-measure-vs-playwright.cjs new file mode 100644 index 00000000..b4426e43 --- /dev/null +++ b/scripts/token-measure-vs-playwright.cjs @@ -0,0 +1,245 @@ +// Apples-to-apples token measurement: Provar MCP vs. Playwright MCP. +// +// Both servers expose tools/list via JSON-RPC stdio. This script drives each +// server with identical methodology — initialize → tools/list — and reports +// the catalog size (characters, approximate tokens at chars/4) plus a per-tool +// breakdown for the heaviest items. +// +// For Playwright MCP we additionally measure a representative tools/call: +// browser_snapshot on a sample page. That's the per-interaction cost that +// dominates Playwright MCP's 114K-per-test figure. +// +// node scripts/token-measure-vs-playwright.cjs + +'use strict'; + +const { spawn } = require('child_process'); +const os = require('os'); +const path = require('path'); + +const TMP = os.tmpdir(); +const PROVAR_ENTRY = path.resolve(__dirname, '..', 'bin', 'mcp-start.js'); + +// ── Generic JSON-RPC stdio driver ─────────────────────────────────────────── + +function driveServer(name, command, args, env, onConnect) { + return new Promise((resolve, reject) => { + const server = spawn(command, args, { + stdio: ['pipe', 'pipe', 'pipe'], + env: { ...process.env, ...env }, + shell: process.platform === 'win32', + }); + + let nextId = 1; + const pending = new Map(); + let buf = ''; + let stderrBuf = ''; + + server.stderr.on('data', (chunk) => { + stderrBuf += chunk.toString('utf-8'); + }); + + server.stdout.on('data', (chunk) => { + buf += chunk.toString('utf-8'); + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + const cb = pending.get(msg.id); + if (cb) { + pending.delete(msg.id); + cb(msg); + } + } catch { + /* non-JSON output — ignore */ + } + } + }); + + server.on('error', (err) => { + reject(new Error(`${name} spawn error: ${err.message}`)); + }); + + const rpc = (method, params, timeoutMs = 30000) => { + const id = nextId++; + const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n'; + return new Promise((rpcResolve, rpcReject) => { + pending.set(id, rpcResolve); + setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + rpcReject(new Error(`Timeout (${timeoutMs}ms) waiting for ${method} on ${name}`)); + } + }, timeoutMs); + server.stdin.write(req); + }); + }; + + (async () => { + try { + const result = await onConnect(rpc); + server.stdin.end(); + // Allow a brief grace period for shutdown + setTimeout(() => server.kill(), 500); + resolve({ ...result, stderr: stderrBuf }); + } catch (err) { + server.kill(); + reject(err); + } + })(); + }); +} + +// ── Helpers ───────────────────────────────────────────────────────────────── + +function tokenize(jsonValue) { + const s = JSON.stringify(jsonValue); + return { + chars: s.length, + tokens: Math.round(s.length / 4), + }; +} + +function reportCatalog(name, toolArr) { + const { chars, tokens } = tokenize(toolArr); + const perTool = toolArr.map((t) => { + const sz = tokenize(t); + return { name: t.name, ...sz, descChars: (t.description ?? '').length }; + }); + perTool.sort((a, b) => b.tokens - a.tokens); + return { + name, + toolCount: toolArr.length, + catalogChars: chars, + catalogTokens: tokens, + meanTokens: Math.round(tokens / Math.max(toolArr.length, 1)), + topTools: perTool.slice(0, 5), + }; +} + +// ── Provar MCP runner ─────────────────────────────────────────────────────── + +async function measureProvar(label, env) { + return driveServer( + `Provar MCP [${label}]`, + process.execPath, + [PROVAR_ENTRY, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], + env, + async (rpc) => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'token-compare', version: '1.0.0' }, + }); + const tools = await rpc('tools/list', {}); + return reportCatalog(`Provar MCP — ${label}`, tools.result?.tools ?? []); + } + ); +} + +// ── Playwright MCP runner ─────────────────────────────────────────────────── + +async function measurePlaywright(label, extraArgs = []) { + return driveServer(`Playwright MCP [${label}]`, 'npx', ['-y', '@playwright/mcp', ...extraArgs], {}, async (rpc) => { + await rpc('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'token-compare', version: '1.0.0' }, + }); + const tools = await rpc('tools/list', {}, 60000); + const report = reportCatalog(`Playwright MCP — ${label}`, tools.result?.tools ?? []); + + // Try to measure a representative tools/call too — browser_snapshot + // against a simple page. This captures the per-interaction cost that + // Playwright MCP charges on every step. + try { + await rpc('tools/call', { name: 'browser_navigate', arguments: { url: 'https://example.com' } }, 60000); + const snap = await rpc('tools/call', { name: 'browser_snapshot', arguments: {} }, 60000); + report.snapshotTokens = tokenize(snap.result).tokens; + report.snapshotPage = 'example.com (simple page baseline)'; + } catch (err) { + report.snapshotError = err.message; + } + return report; + }); +} + +// ── Output formatting ─────────────────────────────────────────────────────── + +function fmtRow(s) { + return `${s.name.padEnd(58)} ${String(s.toolCount).padStart(5)} ${String(s.catalogTokens).padStart(7)}`; +} + +(async () => { + console.log('Apples-to-apples token measurement: Provar MCP vs. Playwright MCP\n'); + console.log('Methodology: spawn each server, send initialize → tools/list, count chars,'); + console.log('estimate tokens at ~4 chars/token. Numbers reflect what the MCP client'); + console.log('serializes and sends to the LLM as its tool catalog.\n'); + + console.log('Measuring Provar MCP (3 configurations)...'); + const provarStandard = await measureProvar('STANDARD (all groups, full descriptions)', {}); + const provarCompact = await measureProvar('COMPACT (all groups, compact descriptions)', { + PROVAR_MCP_SCHEMA_MODE: 'compact', + }); + const provarAuthoring = await measureProvar('AUTHORING (compact + inspect/connection/validation/authoring only)', { + PROVAR_MCP_SCHEMA_MODE: 'compact', + PROVAR_MCP_TOOLS: 'authoring,inspect,connection,validation', + }); + + console.log('Measuring Playwright MCP (default / out-of-the-box)...'); + let pwDefault; + try { + pwDefault = await measurePlaywright('DEFAULT (out-of-the-box)'); + } catch (err) { + console.error(` ⚠ Playwright MCP measurement failed: ${err.message}`); + pwDefault = null; + } + + console.log('\n══════════════════════════════════════════════════════════════════════════════════'); + console.log(`Scenario Tools ~Tokens`); + console.log('══════════════════════════════════════════════════════════════════════════════════'); + console.log(fmtRow(provarStandard)); + console.log(fmtRow(provarCompact)); + console.log(fmtRow(provarAuthoring)); + if (pwDefault) console.log(fmtRow(pwDefault)); + console.log('══════════════════════════════════════════════════════════════════════════════════\n'); + + if (pwDefault) { + const ratioStd = (pwDefault.catalogTokens / provarStandard.catalogTokens).toFixed(2); + const ratioCpt = (pwDefault.catalogTokens / provarCompact.catalogTokens).toFixed(2); + const ratioAut = (pwDefault.catalogTokens / provarAuthoring.catalogTokens).toFixed(2); + console.log('Tool-catalog ratio (Playwright MCP / Provar MCP):'); + console.log(` vs Provar STANDARD : ${ratioStd}× larger`); + console.log(` vs Provar COMPACT : ${ratioCpt}× larger`); + console.log(` vs Provar AUTHORING: ${ratioAut}× larger\n`); + + if (pwDefault.snapshotTokens) { + console.log(`Per-interaction cost (Playwright MCP — ${pwDefault.snapshotPage}):`); + console.log(` browser_snapshot response: ~${pwDefault.snapshotTokens} tokens`); + console.log(` (multiply by interactions per test to project the full session cost)`); + } else if (pwDefault.snapshotError) { + console.log(`Per-interaction measurement skipped: ${pwDefault.snapshotError}`); + } + } + + console.log('\nTop 5 most expensive tools — Provar MCP STANDARD:'); + for (const t of provarStandard.topTools) { + console.log(` ${t.name.padEnd(42)} ~${String(t.tokens).padStart(5)} tokens (desc: ${t.descChars} chars)`); + } + + if (pwDefault) { + console.log('\nTop 5 most expensive tools — Playwright MCP DEFAULT:'); + for (const t of pwDefault.topTools) { + console.log(` ${t.name.padEnd(42)} ~${String(t.tokens).padStart(5)} tokens (desc: ${t.descChars} chars)`); + } + } + + process.exit(0); +})().catch((err) => { + console.error('\nMeasurement error:', err.message); + if (err.stack) console.error(err.stack); + process.exit(1); +}); diff --git a/server.json b/server.json index 1b4354d5..01e3d72e 100644 --- a/server.json +++ b/server.json @@ -14,12 +14,12 @@ "url": "https://github.com/ProvarTesting/provardx-cli", "source": "github" }, - "version": "1.5.0", + "version": "1.5.1", "packages": [ { "registryType": "npm", "identifier": "@provartesting/provardx-cli", - "version": "1.5.0", + "version": "1.5.1", "transport": { "type": "stdio" }, diff --git a/src/mcp/prompts/guidePrompts.ts b/src/mcp/prompts/guidePrompts.ts index 855ac585..14fde43f 100644 --- a/src/mcp/prompts/guidePrompts.ts +++ b/src/mcp/prompts/guidePrompts.ts @@ -263,16 +263,23 @@ Required sequence — do not skip steps: 'author-test': `## Author a New Test Case -1. provar_project_inspect → find coverage gaps before writing -2. provar_automation_metadata_download → if SF metadata is stale (missing fields/objects) -3. provar_pageobject_generate → if a new page object is needed -4. provar_pageobject_validate → validate before compile -5. provar_automation_compile → after any page object change -6. provar_testcase_generate → create the test case file -7. provar_testcase_step_edit → add steps (repeat as needed) -8. provar_testcase_validate → MUST pass before adding to a plan -9. provar_testplan_add-instance → add to an existing plan -10. provar_testplan_validate → validate the plan`, +Construct the full step tree in a single \`provar_testcase_generate\` call. +\`provar_testcase_step_edit\` is for amending an existing case, not for +building one step-by-step (that pattern drops scenarios and flattens nesting). + +1. provar_project_inspect → find coverage gaps before writing +2. provar_qualityhub_examples_retrieve → ground in corpus examples for the step types you need +3. provar_automation_metadata_download → if SF metadata is stale (missing fields/objects) +4. provar_pageobject_generate → only if a new page object is needed +5. provar_pageobject_validate → validate before compile +6. provar_automation_compile → after any page object change +7. provar_testcase_generate → single call, pass ALL steps in one payload +8. provar_testcase_validate → MUST pass before adding to a plan +9. provar_testplan_add-instance → add to an existing plan +10. provar_testplan_validate → validate the plan + +Use provar_testcase_step_edit only to amend an existing validated test case +(single-step add, attribute fix, debug edit) — never to construct one from scratch.`, 'debug-failures': `## Debug Failing Tests @@ -319,11 +326,14 @@ provar_pageobject_validate provar_nitrox_generate OR provar_nitrox_patch └── provar_nitrox_validate (always validate after) -provar_testcase_generate OR provar_testcase_step_edit +provar_testcase_generate (construct full case — pass ALL steps in one call) └── provar_testcase_validate └── provar_testplan_add-instance └── provar_testplan_validate +provar_testcase_step_edit (amend an existing validated case only — never construct) + └── provar_testcase_validate + ### Safe to run in parallel (no dependency between them) - provar_project_inspect + provar_connection_list - provar_pageobject_validate on multiple files diff --git a/src/mcp/server.ts b/src/mcp/server.ts index cbd906f0..20429dba 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -35,6 +35,39 @@ import { registerAllNitroXTools } from './tools/nitroXTools.js'; import { registerAllTestCaseStepTools } from './tools/testCaseStepTools.js'; import { registerAllConnectionTools } from './tools/connectionTools.js'; import { registerAllPrompts } from './prompts/index.js'; +import { + createDepthGuardState, + wrapWithDepthGuard, + type AnyToolCallback, + type DepthGuardState, +} from './utils/tokenMeta.js'; +import { desc } from './tools/descHelper.js'; + +// ── Tool group registry ─────────────────────────────────────────────────────── +// Groups are keyed in lowercase so they match the lowercased env var values. +const TOOL_GROUPS: Record void>> = { + nitrox: [registerAllNitroXTools], + automation: [registerAllAutomationTools], + qualityhub: [registerAllQualityHubTools, registerAllQualityHubApiTools, registerAllDefectTools], + validation: [ + registerProjectValidateFromPath, + registerAllAntTools, + registerAllPropertiesTools, + registerTestCaseValidate, + registerTestSuiteValidate, + registerTestPlanValidate, + registerPageObjectValidate, + ], + authoring: [ + registerTestCaseGenerate, + registerPageObjectGenerate, + registerAllTestCaseStepTools, + registerAllTestPlanTools, + ], + inspect: [registerProjectInspect], + connection: [registerAllConnectionTools], + rca: [registerAllRcaTools], +}; export interface ServerConfig { allowedPaths: string[]; @@ -45,6 +78,43 @@ export interface ServerConfig { }; } +export function parseActiveGroups(): Set | null { + const env = process.env['PROVAR_MCP_TOOLS']; + if (!env?.trim()) return null; + const requested = new Set( + env + .split(',') + .map((g) => g.trim().toLowerCase()) + .filter(Boolean) + ); + if (requested.size === 0) { + log('warn', 'PROVAR_MCP_TOOLS was set but contained no valid group names — activating all groups', { raw: env }); + return null; + } + const known = new Set(Object.keys(TOOL_GROUPS)); + const matched = new Set(); + const unknown: string[] = []; + for (const g of requested) { + if (known.has(g)) matched.add(g); + else unknown.push(g); + } + if (unknown.length > 0) { + log('warn', 'PROVAR_MCP_TOOLS contains unknown group names — they will be ignored', { + raw: env, + unknown, + known: [...known], + }); + } + if (matched.size === 0) { + log('warn', 'PROVAR_MCP_TOOLS matched no known group names — activating all groups', { + raw: env, + known: [...known], + }); + return null; + } + return matched; +} + export function createProvarMcpServer(config: ServerConfig): McpServer { log('info', 'Creating Provar MCP server', { allowedPaths: config.allowedPaths }); @@ -58,10 +128,16 @@ export function createProvarMcpServer(config: ServerConfig): McpServer { 'provardx_ping', { title: 'Ping MCP Server', - description: + description: desc( 'Sanity-check tool. Echoes back a message with a timestamp. Use this to verify the MCP server is reachable before calling other tools.', + 'Echo message back with timestamp; verify MCP server is reachable.' + ), inputSchema: { - message: z.string().optional().default('ping').describe('Optional message to echo back'), + message: z + .string() + .optional() + .default('ping') + .describe(desc('Optional message to echo back', 'message to echo')), }, }, ({ message }) => { @@ -80,26 +156,21 @@ export function createProvarMcpServer(config: ServerConfig): McpServer { } ); + // ── Depth-guard middleware (PDX-474) ───────────────────────────────────────── + const rawLimit = parseInt(process.env['PROVAR_MCP_MAX_TOOL_DEPTH'] ?? '50', 10); + const depthLimit = Number.isNaN(rawLimit) || rawLimit <= 0 ? 50 : rawLimit; + const depthState = createDepthGuardState(); + patchWithMiddleware(server, depthState, depthLimit); + // ── Provar tools ───────────────────────────────────────────────────────────── - registerProjectInspect(server, config); - registerPageObjectGenerate(server, config); - registerPageObjectValidate(server, config); - registerTestCaseGenerate(server, config); - registerTestCaseValidate(server, config); - registerTestSuiteValidate(server); - registerTestPlanValidate(server); - registerProjectValidateFromPath(server, config); - registerAllPropertiesTools(server, config); - registerAllQualityHubTools(server); - registerAllQualityHubApiTools(server); - registerAllAutomationTools(server, config); - registerAllDefectTools(server); - registerAllAntTools(server, config); - registerAllRcaTools(server, config); - registerAllTestPlanTools(server, config); - registerAllNitroXTools(server, config); - registerAllTestCaseStepTools(server, config); - registerAllConnectionTools(server, config); + const activeGroups = parseActiveGroups(); + for (const [group, registrars] of Object.entries(TOOL_GROUPS)) { + if (activeGroups === null || activeGroups.has(group)) { + for (const register of registrars) { + register(server, config); + } + } + } // ── Provar prompts ─────────────────────────────────────────────────────────── registerAllPrompts(server); @@ -216,6 +287,15 @@ export function createProvarMcpServer(config: ServerConfig): McpServer { return server; } +function patchWithMiddleware(server: McpServer, state: DepthGuardState, limit: number): void { + const orig = server.registerTool.bind(server); + type RegisterToolFn = (n: string, c: unknown, h: AnyToolCallback) => unknown; + // Cast through unknown to patch the overloaded method without triggering no-unsafe-any. + const patchable = server as unknown as { registerTool: RegisterToolFn }; + patchable.registerTool = (name: string, config: unknown, handler: AnyToolCallback): unknown => + (orig as unknown as RegisterToolFn)(name, config, wrapWithDepthGuard(name, handler, state, limit)); +} + /** * Resolve the docs directory for bundled MCP Markdown resources. * In compiled output (lib/mcp/) the sibling docs/ dir exists; in dev/ts-node diff --git a/src/mcp/tools/antTools.ts b/src/mcp/tools/antTools.ts index e7c64c32..9efc0c93 100644 --- a/src/mcp/tools/antTools.ts +++ b/src/mcp/tools/antTools.ts @@ -15,6 +15,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId, type ValidationIssue } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; // ── Sub-schemas ─────────────────────────────────────────────────────────────── @@ -70,72 +71,109 @@ export function registerAntGenerate(server: McpServer, config: ServerConfig): vo 'provar_ant_generate', { title: 'Generate ANT Build File', - description: [ - 'Generate a Provar ANT build.xml file.', - 'Produces the standard skeleton with Provar-Compile and Run-Test-Case tasks.', - 'Supports targeting tests by project folder, plan folder, or specific .testcase files via filesets.', - 'Returns XML content. Writes to disk only when dry_run=false.', - ].join(' '), + description: desc( + [ + 'Generate a Provar ANT build.xml file.', + 'Produces the standard skeleton with Provar-Compile and Run-Test-Case tasks.', + 'Supports targeting tests by project folder, plan folder, or specific .testcase files via filesets.', + 'Returns XML content. Writes to disk only when dry_run=false.', + ].join(' '), + 'Generate a Provar ANT build.xml with Provar-Compile and Run-Test-Case tasks.' + ), inputSchema: { // ── Core paths ────────────────────────────────────────────────────────── provar_home: z .string() .describe( - 'Absolute path to the Provar installation directory (e.g. "C:/Program Files/Provar/"). Used for provar.home property and ant taskdef classpaths.' + desc( + 'Absolute path to the Provar installation directory (e.g. "C:/Program Files/Provar/"). Used for provar.home property and ant taskdef classpaths.', + 'string, absolute path to Provar installation' + ) ), project_path: z .string() .default('..') - .describe('Path to the Provar test project root. Defaults to ".." (parent of the ANT folder).'), + .describe( + desc( + 'Path to the Provar test project root. Defaults to ".." (parent of the ANT folder).', + 'string, path to project root' + ) + ), results_path: z .string() .default('../ANT/Results') - .describe('Path where test results are written. Defaults to "../ANT/Results".'), + .describe( + desc('Path where test results are written. Defaults to "../ANT/Results".', 'string, path for test results') + ), project_cache_path: z .string() .optional() .describe( - 'Path to the .provarCaches directory. Defaults to "../../.provarCaches" relative to the ANT folder.' + desc( + 'Path to the .provarCaches directory. Defaults to "../../.provarCaches" relative to the ANT folder.', + 'string, optional; path to .provarCaches' + ) ), license_path: z .string() .optional() - .describe('Path to the Provar .licenses directory (e.g. "${env.PROVAR_HOME}/.licenses").'), + .describe( + desc( + 'Path to the Provar .licenses directory (e.g. "${env.PROVAR_HOME}/.licenses").', + 'string, optional; path to .licenses dir' + ) + ), smtp_path: z .string() .optional() - .describe('Path to the Provar .smtp directory (e.g. "${env.PROVAR_HOME}/.smtp").'), + .describe( + desc( + 'Path to the Provar .smtp directory (e.g. "${env.PROVAR_HOME}/.smtp").', + 'string, optional; path to .smtp dir' + ) + ), // ── Test selection ────────────────────────────────────────────────────── filesets: z .array(FilesetSchema) .min(1) .describe( - 'One or more filesets defining which tests to run. ' + - 'To run all tests under a folder: { dir: "../tests" }. ' + - 'To run a plan: { id: "testplan", dir: "../plans/MyPlan" }. ' + - 'To run specific test cases: { dir: "../tests/Suite", includes: ["MyTest.testcase"] }.' + desc( + 'One or more filesets defining which tests to run. ' + + 'To run all tests under a folder: { dir: "../tests" }. ' + + 'To run a plan: { id: "testplan", dir: "../plans/MyPlan" }. ' + + 'To run specific test cases: { dir: "../tests/Suite", includes: ["MyTest.testcase"] }.', + 'array, min 1; filesets defining which tests to run' + ) ), // ── Browser / environment ─────────────────────────────────────────────── web_browser: z .enum(['Chrome', 'Chrome_Headless', 'Firefox', 'Edge', 'Edge_Legacy', 'Safari', 'IE']) .default('Chrome') - .describe('Web browser to use for test execution.'), + .describe( + desc('Web browser to use for test execution.', 'enum Chrome|Chrome_Headless|Firefox|Edge|Safari|IE') + ), web_browser_configuration: z .string() .default('Full Screen') - .describe('Browser window configuration (e.g. "Full Screen").'), - web_browser_provider_name: z.string().default('Desktop').describe('Browser provider name (e.g. "Desktop").'), + .describe(desc('Browser window configuration (e.g. "Full Screen").', 'string, browser window config')), + web_browser_provider_name: z + .string() + .default('Desktop') + .describe(desc('Browser provider name (e.g. "Desktop").', 'string, browser provider name')), web_browser_device_name: z .string() .default('Full Screen') - .describe('Browser device name (e.g. "Full Screen").'), + .describe(desc('Browser device name (e.g. "Full Screen").', 'string, browser device name')), test_environment: z .string() .default('') .describe( - 'Named test environment to use (must match a connection in the project). Empty string uses default.' + desc( + 'Named test environment to use (must match a connection in the project). Empty string uses default.', + 'string, optional; named test environment' + ) ), // ── Cache / metadata ──────────────────────────────────────────────────── @@ -143,7 +181,10 @@ export function registerAntGenerate(server: McpServer, config: ServerConfig): vo .enum(['Reuse', 'Refresh', 'Reload']) .default('Reuse') .describe( - 'Salesforce metadata cache strategy: Reuse (fastest, uses cached), Refresh (re-downloads), Reload (clears and re-downloads).' + desc( + 'Salesforce metadata cache strategy: Reuse (fastest, uses cached), Refresh (re-downloads), Reload (clears and re-downloads).', + 'enum Reuse|Refresh|Reload' + ) ), // ── Output / logging ──────────────────────────────────────────────────── @@ -151,79 +192,132 @@ export function registerAntGenerate(server: McpServer, config: ServerConfig): vo .enum(['Increment', 'Replace', 'Reuse']) .default('Increment') .describe( - 'How to handle the results folder when it already exists: Increment (new subfolder), Replace (overwrite), Reuse (append).' + desc( + 'How to handle the results folder when it already exists: Increment (new subfolder), Replace (overwrite), Reuse (append).', + 'enum Increment|Replace|Reuse' + ) ), test_output_level: z .enum(['BASIC', 'WARNING', 'DEBUG']) .default('BASIC') - .describe('Verbosity level for test output logs.'), + .describe(desc('Verbosity level for test output logs.', 'enum BASIC|WARNING|DEBUG')), plugin_output_level: z .enum(['BASIC', 'WARNING', 'DEBUG']) .default('WARNING') - .describe('Verbosity level for plugin output logs.'), + .describe(desc('Verbosity level for plugin output logs.', 'enum BASIC|WARNING|DEBUG')), // ── Execution behaviour ───────────────────────────────────────────────── stop_test_run_on_error: z .boolean() .default(false) - .describe('Abort the entire test run when any test case fails.'), + .describe(desc('Abort the entire test run when any test case fails.', 'bool, optional; abort on failure')), exclude_callable_test_cases: z .boolean() .default(true) - .describe('Skip test cases marked as callable (library/helper) when true.'), + .describe( + desc( + 'Skip test cases marked as callable (library/helper) when true.', + 'bool, optional; skip callable tests' + ) + ), dont_fail_build: z .boolean() .optional() .describe( - 'When true, the ANT build does not fail even if tests fail. Useful for CI pipelines that collect results separately.' + desc( + 'When true, the ANT build does not fail even if tests fail. Useful for CI pipelines that collect results separately.', + 'bool, optional; skip build failure on test fail' + ) ), - invoke_test_run_monitor: z.boolean().default(true).describe('Enable the Provar test run monitor.'), + invoke_test_run_monitor: z + .boolean() + .default(true) + .describe(desc('Enable the Provar test run monitor.', 'bool, optional; enable test run monitor')), // ── Secrets / security ────────────────────────────────────────────────── secrets_password: z .string() .default('${env.ProvarSecretsPassword}') .describe( - 'Encryption key used to decrypt the Provar .secrets file (the password string itself, not a file path). Defaults to reading from the ProvarSecretsPassword environment variable.' + desc( + 'Encryption key used to decrypt the Provar .secrets file (the password string itself, not a file path). Defaults to reading from the ProvarSecretsPassword environment variable.', + 'string, NOT a file path; encryption key for .secrets' + ) ), test_environment_secrets_password: z .string() .optional() .describe( - 'Per-environment secrets password. Defaults to reading from the ProvarSecretsPassword_EnvName environment variable.' + desc( + 'Per-environment secrets password. Defaults to reading from the ProvarSecretsPassword_EnvName environment variable.', + 'string, optional; per-environment secrets key' + ) ), // ── Test Cycle ────────────────────────────────────────────────────────── - test_cycle_path: z.string().optional().describe('Path to a TestCycle folder (used with test cycle reporting).'), + test_cycle_path: z + .string() + .optional() + .describe( + desc( + 'Path to a TestCycle folder (used with test cycle reporting).', + 'string, optional; path to TestCycle folder' + ) + ), test_cycle_run_type: z .enum(['ALL', 'FAILED', 'NEW']) .optional() - .describe('Which tests in the cycle to run (ALL, FAILED, NEW).'), + .describe(desc('Which tests in the cycle to run (ALL, FAILED, NEW).', 'enum ALL|FAILED|NEW, optional')), // ── Plan features ─────────────────────────────────────────────────────── plan_features: z .array(PlanFeatureSchema) .optional() .describe( - 'Output and notification features to enable/disable (e.g. PDF, PIECHART, EMAIL). ' + - 'Only meaningful when running by test plan.' + desc( + 'Output and notification features to enable/disable (e.g. PDF, PIECHART, EMAIL). ' + + 'Only meaningful when running by test plan.', + 'array, optional; plan output/notification features' + ) ), // ── Email / attachment reporting ──────────────────────────────────────── email_properties: EmailPropertiesSchema.optional().describe( - 'Email notification settings. Omit to exclude from the XML.' + desc( + 'Email notification settings. Omit to exclude from the XML.', + 'object, optional; email notification settings' + ) ), attachment_properties: AttachmentPropertiesSchema.optional().describe( - 'Attachment/report content settings. Omit to exclude from the XML.' + desc( + 'Attachment/report content settings. Omit to exclude from the XML.', + 'object, optional; attachment/report content settings' + ) ), // ── File output ───────────────────────────────────────────────────────── output_path: z .string() .optional() - .describe('Where to write the build.xml file (returned in response). Required when dry_run=false.'), - overwrite: z.boolean().default(false).describe('Overwrite output_path if the file already exists.'), - dry_run: z.boolean().default(true).describe('true = return XML only (default); false = write to output_path.'), + .describe( + desc( + 'Where to write the build.xml file (returned in response). Required when dry_run=false.', + 'string, optional; absolute path for build.xml output' + ) + ), + overwrite: z + .boolean() + .default(false) + .describe(desc('Overwrite output_path if the file already exists.', 'bool, optional; overwrite if exists')), + dry_run: z + .boolean() + .default(true) + .describe( + desc( + 'true = return XML only (default); false = write to output_path.', + 'bool, optional; true=return only, false=write' + ) + ), }, }, (input) => { @@ -299,15 +393,24 @@ export function registerAntValidate(server: McpServer, config: ServerConfig): vo 'provar_ant_validate', { title: 'Validate ANT Build File', - description: [ - 'Validate a Provar ANT build.xml for structural correctness.', - 'Checks XML well-formedness, required declarations, step,', - ' with required attributes (provarHome, projectPath, resultsPath),', - 'and at least one child. Returns is_valid, issues list, and a validity_score.', - ].join(' '), + description: desc( + [ + 'Validate a Provar ANT build.xml for structural correctness.', + 'Checks XML well-formedness, required declarations, step,', + ' with required attributes (provarHome, projectPath, resultsPath),', + 'and at least one child. Returns is_valid, issues list, and a validity_score.', + ].join(' '), + 'Validate a Provar ANT build.xml for structural correctness.' + ), inputSchema: { - content: z.string().optional().describe('XML content to validate directly'), - file_path: z.string().optional().describe('Path to the build.xml file to validate'), + content: z + .string() + .optional() + .describe(desc('XML content to validate directly', 'string, optional; inline XML')), + file_path: z + .string() + .optional() + .describe(desc('Path to the build.xml file to validate', 'string, optional; absolute path to build.xml')), }, }, ({ content, file_path }) => { diff --git a/src/mcp/tools/automationTools.ts b/src/mcp/tools/automationTools.ts index b631c760..ccc712df 100644 --- a/src/mcp/tools/automationTools.ts +++ b/src/mcp/tools/automationTools.ts @@ -17,6 +17,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { parseJUnitResults } from './antTools.js'; import { runSfCommand } from './sfSpawn.js'; +import { desc } from './descHelper.js'; // Re-export sf resolution helpers so existing test imports from automationTools continue to work export { getSfCommonPaths, needsWindowsShell, setSfPathCacheForTesting, setSfPlatformForTesting } from './sfSpawn.js'; @@ -46,20 +47,33 @@ export function registerAutomationConfigLoad(server: McpServer, config: ServerCo 'provar_automation_config_load', { title: 'Load Automation Config', - description: [ - 'Register a provardx-properties.json file as the active Provar configuration.', - 'Invokes `sf provar automation config load --properties-file `, writing the path to ~/.sf/config.json.', - 'REQUIRED before provar_automation_compile or provar_automation_testrun — without this step those commands fail with MISSING_FILE.', - 'Typical workflow: provar_automation_config_load → provar_automation_compile → provar_automation_testrun.', - ].join(' '), + description: desc( + [ + 'Register a provardx-properties.json file as the active Provar configuration.', + 'Invokes `sf provar automation config load --properties-file `, writing the path to ~/.sf/config.json.', + 'REQUIRED before provar_automation_compile or provar_automation_testrun — without this step those commands fail with MISSING_FILE.', + 'Typical workflow: provar_automation_config_load → provar_automation_compile → provar_automation_testrun.', + ].join(' '), + 'Register a provardx-properties.json as active config; required before compile/testrun.' + ), inputSchema: { properties_path: z .string() - .describe('Absolute path to the provardx-properties.json file to register as active configuration'), + .describe( + desc( + 'Absolute path to the provardx-properties.json file to register as active configuration', + 'string, absolute path to provardx-properties.json' + ) + ), sf_path: z .string() .optional() - .describe('Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")'), + .describe( + desc( + 'Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")', + 'string, optional; path to sf CLI' + ) + ), }, }, ({ properties_path, sf_path }) => { @@ -217,26 +231,39 @@ export function registerAutomationTestRun(server: McpServer, config: ServerConfi 'provar_automation_testrun', { title: 'Run Tests', - description: [ - 'Trigger a LOCAL Provar automation test run using installed Provar binaries. Invokes `sf provar automation test run`.', - 'PREREQUISITE: Run provar_automation_config_load first to register a provardx-properties.json — without this the command fails with MISSING_FILE.', - 'Requires Provar to be installed locally and provarHome set correctly in the properties file.', - 'Use provar_automation_setup first if Provar is not yet installed.', - 'For grid/CI execution via Provar Quality Hub instead of running locally, use provar_qualityhub_testrun.', - 'Output buffer: a 50 MB maxBuffer is set so ENOBUFS on verbose Provar runs is now rare.', - 'If ENOBUFS still occurs (extremely verbose logging), run `sf provar automation test run --json` directly in the terminal and pipe or tail the output instead of retrying this tool.', - 'Typical local AI loop: config.load → compile → testrun → inspect results.', - ].join(' '), + description: desc( + [ + 'Trigger a LOCAL Provar automation test run using installed Provar binaries. Invokes `sf provar automation test run`.', + 'PREREQUISITE: Run provar_automation_config_load first to register a provardx-properties.json — without this the command fails with MISSING_FILE.', + 'Requires Provar to be installed locally and provarHome set correctly in the properties file.', + 'Use provar_automation_setup first if Provar is not yet installed.', + 'For grid/CI execution via Provar Quality Hub instead of running locally, use provar_qualityhub_testrun.', + 'Output buffer: a 50 MB maxBuffer is set so ENOBUFS on verbose Provar runs is now rare.', + 'If ENOBUFS still occurs (extremely verbose logging), run `sf provar automation test run --json` directly in the terminal and pipe or tail the output instead of retrying this tool.', + 'Typical local AI loop: config.load → compile → testrun → inspect results.', + ].join(' '), + 'Run local Provar tests via sf CLI; requires config_load first.' + ), inputSchema: { flags: z .array(z.string()) .optional() .default([]) - .describe('Raw CLI flags to forward (e.g. ["--project-path", "/path/to/project"])'), + .describe( + desc( + 'Raw CLI flags to forward (e.g. ["--project-path", "/path/to/project"])', + 'array, optional; raw CLI flags' + ) + ), sf_path: z .string() .optional() - .describe('Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")'), + .describe( + desc( + 'Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")', + 'string, optional; path to sf CLI' + ) + ), }, }, ({ flags, sf_path }) => { @@ -296,21 +323,34 @@ export function registerAutomationCompile(server: McpServer): void { 'provar_automation_compile', { title: 'Compile Test Assets', - description: [ - 'Compile a Provar automation project. Invokes `sf provar automation project compile`.', - 'PREREQUISITE: Run provar_automation_config_load first to register a provardx-properties.json — without this the command fails with MISSING_FILE.', - 'Run this before triggering a test run after modifying test cases.', - ].join(' '), + description: desc( + [ + 'Compile a Provar automation project. Invokes `sf provar automation project compile`.', + 'PREREQUISITE: Run provar_automation_config_load first to register a provardx-properties.json — without this the command fails with MISSING_FILE.', + 'Run this before triggering a test run after modifying test cases.', + ].join(' '), + 'Compile a Provar project; requires config_load first.' + ), inputSchema: { flags: z .array(z.string()) .optional() .default([]) - .describe('Raw CLI flags to forward (e.g. ["--project-path", "/path/to/project"])'), + .describe( + desc( + 'Raw CLI flags to forward (e.g. ["--project-path", "/path/to/project"])', + 'array, optional; raw CLI flags' + ) + ), sf_path: z .string() .optional() - .describe('Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")'), + .describe( + desc( + 'Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")', + 'string, optional; path to sf CLI' + ) + ), }, }, ({ flags, sf_path }) => { @@ -355,27 +395,38 @@ export function registerAutomationMetadataDownload(server: McpServer): void { 'provar_automation_metadata_download', { title: 'Download Salesforce Metadata', - description: [ - 'Download Salesforce metadata for one or more connections into a Provar project.', - 'Invokes `sf provar automation metadata download`.', - 'PREREQUISITE: Call provar_automation_config_load first — without it the command fails with MISSING_FILE.', - 'Use the -c flag to specify connections: flags: ["-c", "ConnectionName1,ConnectionName2"].', - 'Connection names are case-sensitive and must match the names defined in the Provar project.', - 'If the download fails with [DOWNLOAD_ERROR], this is almost always a Salesforce authentication issue —', - 'check that the credentials in the project .secrets file are current and that any referenced scratch orgs have not expired.', - ].join(' '), + description: desc( + [ + 'Download Salesforce metadata for one or more connections into a Provar project.', + 'Invokes `sf provar automation metadata download`.', + 'PREREQUISITE: Call provar_automation_config_load first — without it the command fails with MISSING_FILE.', + 'Use the -c flag to specify connections: flags: ["-c", "ConnectionName1,ConnectionName2"].', + 'Connection names are case-sensitive and must match the names defined in the Provar project.', + 'If the download fails with [DOWNLOAD_ERROR], this is almost always a Salesforce authentication issue —', + 'check that the credentials in the project .secrets file are current and that any referenced scratch orgs have not expired.', + ].join(' '), + 'Download Salesforce metadata for project connections; requires config_load first.' + ), inputSchema: { flags: z .array(z.string()) .optional() .default([]) .describe( - 'Raw CLI flags to forward. Use ["-c", "Name1,Name2"] (or the equivalent --connections form) to target specific connections. Example: ["-c", "MyOrg,SandboxOrg"]' + desc( + 'Raw CLI flags to forward. Use ["-c", "Name1,Name2"] (or the equivalent --connections form) to target specific connections. Example: ["-c", "MyOrg,SandboxOrg"]', + 'array, optional; raw CLI flags e.g. ["-c", "ConnName"]' + ) ), sf_path: z .string() .optional() - .describe('Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")'), + .describe( + desc( + 'Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")', + 'string, optional; path to sf CLI' + ) + ), }, }, ({ flags, sf_path }) => { @@ -503,32 +554,48 @@ export function registerAutomationSetup(server: McpServer): void { 'provar_automation_setup', { title: 'Install Provar Automation', - description: [ - 'Download and install Provar Automation binaries locally. Invokes `sf provar automation setup`.', - 'Before downloading, checks for existing Provar installations in:', - ' • PROVAR_HOME environment variable', - ' • ./ProvarHome (default CLI install location)', - ' • C:\\Program Files\\Provar* (Windows system installs)', - ' • /Applications/Provar* (macOS app installs)', - 'If an existing installation is found, returns its path so you can set provarHome in the properties file — skipping the download unless force is true.', - 'After a successful install, update the provarHome property in provardx-properties.json to the returned install_path using provar_properties_set.', - ].join(' '), + description: desc( + [ + 'Download and install Provar Automation binaries locally. Invokes `sf provar automation setup`.', + 'Before downloading, checks for existing Provar installations in:', + ' • PROVAR_HOME environment variable', + ' • ./ProvarHome (default CLI install location)', + ' • C:\\Program Files\\Provar* (Windows system installs)', + ' • /Applications/Provar* (macOS app installs)', + 'If an existing installation is found, returns its path so you can set provarHome in the properties file — skipping the download unless force is true.', + 'After a successful install, update the provarHome property in provardx-properties.json to the returned install_path using provar_properties_set.', + ].join(' '), + 'Download and install Provar Automation binaries; skips if already installed.' + ), inputSchema: { version: z .string() .optional() .describe( - 'Specific Provar Automation version to install, e.g. "2.12.0". Omit to install the latest release.' + desc( + 'Specific Provar Automation version to install, e.g. "2.12.0". Omit to install the latest release.', + 'string, optional; version to install e.g. "2.12.0"' + ) ), force: z .boolean() .optional() .default(false) - .describe('Force a fresh download even if an existing installation is already detected (default: false).'), + .describe( + desc( + 'Force a fresh download even if an existing installation is already detected (default: false).', + 'bool, optional; force re-download' + ) + ), sf_path: z .string() .optional() - .describe('Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")'), + .describe( + desc( + 'Path to the sf CLI executable when not in PATH (e.g. "~/.nvm/versions/node/v22.0.0/bin/sf")', + 'string, optional; path to sf CLI' + ) + ), }, }, ({ version, force, sf_path }) => { diff --git a/src/mcp/tools/connectionTools.ts b/src/mcp/tools/connectionTools.ts index 30f954e6..398b2407 100644 --- a/src/mcp/tools/connectionTools.ts +++ b/src/mcp/tools/connectionTools.ts @@ -15,6 +15,8 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { maskFields, parseFieldsParam } from '../utils/fieldMask.js'; +import { desc } from './descHelper.js'; // ── Types ───────────────────────────────────────────────────────────────────── @@ -57,7 +59,12 @@ const TP_PARSER = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', parseAttributeValue: false, - isArray: (name): boolean => name === 'connectionClass' || name === 'connection' || name === 'environment', + isArray: (name): boolean => + name === 'connectionClass' || + name === 'connection' || + name === 'environment' || + name === 'connectionUrl' || + name === 'association', }); class XmlParseError extends Error { @@ -75,33 +82,73 @@ function parseTestProjectXml(content: string): Record { return raw !== null && typeof raw === 'object' ? (raw as Record) : {}; } -function parseConnectionList(content: string): ConnectionEntry[] { - const tp = parseTestProjectXml(content); +interface ConnectionInfo { + name: string; + className: string; + defaultUrl?: string; + urlsByEnvId: Map; +} + +function buildConnectionMap(tp: Record): Map { + const map = new Map(); const cc = tp['connectionClasses']; - if (!cc || typeof cc !== 'object') return []; + if (!cc || typeof cc !== 'object') return map; const classesRaw = (cc as Record)['connectionClass']; - if (!classesRaw) return []; - const classes = classesRaw as Array>; + if (!Array.isArray(classesRaw)) return map; - const connections: ConnectionEntry[] = []; - for (const cls of classes) { + for (const cls of classesRaw as Array>) { const className = cls['@_name'] as string | undefined; if (!className) continue; - const connsRaw = cls['connection']; - if (!connsRaw) continue; + // Real .testproject XML nests each inside a wrapper. + const connsWrap = cls['connections'] as Record | undefined; + const connsRaw = connsWrap?.['connection']; + if (!Array.isArray(connsRaw)) continue; for (const conn of connsRaw as Array>) { - const connName = conn['@_name'] as string | undefined; - if (!connName) continue; - const url = conn['@_url'] as string | undefined; - connections.push({ - name: connName, - type: classToType(className), - ...(url ? { url } : {}), - sso_configured: className === 'sso', - }); + const id = conn['@_id'] as string | undefined; + const name = conn['@_name'] as string | undefined; + if (!name) continue; + + let defaultUrl: string | undefined; + const urlsByEnvId = new Map(); + const urlsWrap = conn['connectionUrls'] as Record | undefined; + const urlsRaw = urlsWrap?.['connectionUrl']; + if (Array.isArray(urlsRaw)) { + for (const u of urlsRaw as Array>) { + const url = u['@_url'] as string | undefined; + if (!url) continue; + const envId = u['@_envId'] as string | undefined; + // The base entry (no @_envId) is the connection's default URL; + // entries with @_envId are environment-specific overrides keyed by env GUID. + if (envId) urlsByEnvId.set(envId, url); + else if (defaultUrl === undefined) defaultUrl = url; + } + } + + const info: ConnectionInfo = { name, className, defaultUrl, urlsByEnvId }; + if (id) map.set(id, info); + // Also key by name so name-based lookups (e.g. legacy callers) still work. + map.set(`name:${name}`, info); } } + return map; +} + +function parseConnectionList(content: string): ConnectionEntry[] { + const tp = parseTestProjectXml(content); + const map = buildConnectionMap(tp); + const connections: ConnectionEntry[] = []; + const seen = new Set(); + for (const info of map.values()) { + if (seen.has(info)) continue; + seen.add(info); + connections.push({ + name: info.name, + type: classToType(info.className), + ...(info.defaultUrl ? { url: info.defaultUrl } : {}), + sso_configured: info.className === 'sso', + }); + } return connections; } @@ -111,18 +158,38 @@ function parseEnvironmentList(content: string): EnvironmentEntry[] { if (!envSection || typeof envSection !== 'object') return []; const envsRaw = (envSection as Record)['environment']; - if (!envsRaw) return []; + if (!Array.isArray(envsRaw)) return []; + const connectionMap = buildConnectionMap(tp); const environments: EnvironmentEntry[] = []; for (const env of envsRaw as Array>) { const name = env['@_name'] as string | undefined; if (!name) continue; - const connection = env['@_connectionName'] as string | undefined; - const url = env['@_url'] as string | undefined; + const envGuid = env['@_guid'] as string | undefined; + + let connectionName = ''; + let envUrl: string | undefined; + // associations may be missing, an empty string (no associations), or an object wrapping an array. + const assocs = env['associations']; + if (assocs !== null && typeof assocs === 'object') { + const assocsRaw = (assocs as Record)['association']; + if (Array.isArray(assocsRaw) && assocsRaw.length > 0) { + const first = assocsRaw[0] as Record; + const connId = first['@_connectionId'] as string | undefined; + if (connId) { + const info = connectionMap.get(connId); + if (info) { + connectionName = info.name; + if (envGuid) envUrl = info.urlsByEnvId.get(envGuid); + } + } + } + } + environments.push({ name, - connection: connection ?? '', - ...(url ? { url } : {}), + connection: connectionName, + ...(envUrl ? { url: envUrl } : {}), }); } return environments; @@ -135,20 +202,39 @@ export function registerConnectionList(server: McpServer, config: ServerConfig): 'provar_connection_list', { title: 'List Connections', - description: [ - 'List all connections and named environments defined in the .testproject file.', - 'Use this before generating test cases or page objects to get the correct connection names.', - 'Returns connections (name, type, url, sso_configured) and environments (name, connection, url).', - 'Prerequisite: the project must have a .testproject file — run provar_project_validate first if unsure.', - 'Security: only connection names, types, and URLs are returned — credential values from .secrets are never included.', - ].join(' '), + description: desc( + [ + 'List all connections and named environments defined in the .testproject file.', + 'Use this before generating test cases or page objects to get the correct connection names.', + 'Returns connections (name, type, url, sso_configured) and environments (name, connection, url).', + 'Prerequisite: the project must have a .testproject file — run provar_project_validate first if unsure.', + 'Security: only connection names, types, and URLs are returned — credential values from .secrets are never included.', + ].join(' '), + 'List connections and environments from the .testproject file.' + ), inputSchema: { project_path: z .string() - .describe('Absolute or relative path to the Provar project root directory (must be within --allowed-paths)'), + .describe( + desc( + 'Absolute or relative path to the Provar project root directory (must be within --allowed-paths)', + 'string, absolute path to project root' + ) + ), + fields: z + .string() + .optional() + .describe( + desc( + 'Comma-separated list of top-level response keys to retain (e.g. "connections,summary"). ' + + 'Supports dot notation for nested filtering (e.g. "connections.name,connections.type"). ' + + 'Unknown field names are silently ignored. Omit for full response.', + 'string, optional; comma-separated keys to keep (supports dot notation)' + ) + ), }, }, - ({ project_path }) => { + ({ project_path, fields }) => { const requestId = makeRequestId(); log('info', 'provar_connection_list', { requestId, project_path }); @@ -186,7 +272,7 @@ export function registerConnectionList(server: McpServer, config: ServerConfig): const connections = parseConnectionList(content); const environments = parseEnvironmentList(content); - const result = { + let result: Record = { requestId, project_path: resolvedPath, connections, @@ -196,6 +282,12 @@ export function registerConnectionList(server: McpServer, config: ServerConfig): environment_count: environments.length, }, }; + + const fieldList = parseFieldsParam(fields); + if (fieldList) { + result = maskFields(result, fieldList) as Record; + } + return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], structuredContent: result, diff --git a/src/mcp/tools/defectTools.ts b/src/mcp/tools/defectTools.ts index 81ca6a70..feb44c10 100644 --- a/src/mcp/tools/defectTools.ts +++ b/src/mcp/tools/defectTools.ts @@ -11,6 +11,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { runSfCommand, soqlEscape } from './sfSpawn.js'; +import { desc } from './descHelper.js'; // ── Types ────────────────────────────────────────────────────────────────────── @@ -257,29 +258,47 @@ export function registerQualityHubDefectCreate(server: McpServer): void { 'provar_qualityhub_defect_create', { title: 'Create Defects', - description: [ - 'Create Defect__c records in Quality Hub for failed test executions in a given run.', - 'Queries the run by Tracking_Id__c, finds failed Test_Execution__c records, creates a', - 'Defect__c per failure (with description, step, browser, environment, tester), and links', - 'it via Test_Case_Defect__c and Test_Execution_Defect__c junction records.', - 'If Jira or ADO sync is configured in Quality Hub, defects sync to those systems automatically.', - ].join(' '), + description: desc( + [ + 'Create Defect__c records in Quality Hub for failed test executions in a given run.', + 'Queries the run by Tracking_Id__c, finds failed Test_Execution__c records, creates a', + 'Defect__c per failure (with description, step, browser, environment, tester), and links', + 'it via Test_Case_Defect__c and Test_Execution_Defect__c junction records.', + 'If Jira or ADO sync is configured in Quality Hub, defects sync to those systems automatically.', + ].join(' '), + 'Create Defect__c records for failed Quality Hub test executions.' + ), inputSchema: { - run_id: z.string().describe('Test run Tracking_Id__c value returned by provar_qualityhub_testrun'), - target_org: z.string().describe('SF org alias or username for the Quality Hub org'), + run_id: z + .string() + .describe( + desc( + 'Test run Tracking_Id__c value returned by provar_qualityhub_testrun', + 'string, tracking ID from qualityhub_testrun' + ) + ), + target_org: z + .string() + .describe(desc('SF org alias or username for the Quality Hub org', 'string, SF org alias or username')), failed_tests: z .array(z.string()) .optional() .describe( - 'Optional filter — list of Test_Case__c record ID substrings to restrict defect creation to specific failures' + desc( + 'Optional filter — list of Test_Case__c record ID substrings to restrict defect creation to specific failures', + 'array of strings, optional; filter by TC ID substring' + ) ), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) ), }, }, diff --git a/src/mcp/tools/descHelper.ts b/src/mcp/tools/descHelper.ts new file mode 100644 index 00000000..e890e0be --- /dev/null +++ b/src/mcp/tools/descHelper.ts @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/** + * Returns `compact` when PROVAR_MCP_SCHEMA_MODE=compact, otherwise `standard`. + * Reads the env var on each call so tests can set it without resetting module cache. + */ +export function desc(standard: string, compact: string): string { + return process.env['PROVAR_MCP_SCHEMA_MODE'] === 'compact' ? compact : standard; +} diff --git a/src/mcp/tools/nitroXTools.ts b/src/mcp/tools/nitroXTools.ts index eb073922..6f262ad6 100644 --- a/src/mcp/tools/nitroXTools.ts +++ b/src/mcp/tools/nitroXTools.ts @@ -18,6 +18,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId, type ValidationIssue } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; // ── Types ───────────────────────────────────────────────────────────────────── @@ -487,29 +488,39 @@ export function registerNitroXDiscover(server: McpServer): void { 'provar_nitrox_discover', { title: 'Discover NitroX Components', - description: [ - 'Discover Provar projects containing NitroX (Hybrid Model) page objects.', - 'Scans directories for .testproject marker files, then inventories nitroX/ and nitroXPackages/ directories.', - "NitroX is Provar's Hybrid Model for locators — component-based page objects for LWC,", - 'Screen Flow, Industry Components, Experience Cloud, and HTML5 components.', - 'Results provide file paths and package info for use with provar_nitrox_read, validate, and generate.', - ].join(' '), + description: desc( + [ + 'Discover Provar projects containing NitroX (Hybrid Model) page objects.', + 'Scans directories for .testproject marker files, then inventories nitroX/ and nitroXPackages/ directories.', + "NitroX is Provar's Hybrid Model for locators — component-based page objects for LWC,", + 'Screen Flow, Industry Components, Experience Cloud, and HTML5 components.', + 'Results provide file paths and package info for use with provar_nitrox_read, validate, and generate.', + ].join(' '), + 'Discover Provar projects with NitroX Hybrid Model page objects.' + ), inputSchema: { search_roots: z .array(z.string()) .optional() - .describe('Directories to scan (default: cwd; if empty, falls back to ~/git and ~/Provar)'), + .describe( + desc( + 'Directories to scan (default: cwd; if empty, falls back to ~/git and ~/Provar)', + 'array of strings, optional; dirs to scan' + ) + ), max_depth: z .number() .int() .min(1) .max(20) .default(6) - .describe('Maximum directory depth for .testproject search'), + .describe(desc('Maximum directory depth for .testproject search', 'int 1–20, optional; max scan depth')), include_packages: z .boolean() .default(true) - .describe('Include nitroXPackages/ package.json metadata in results'), + .describe( + desc('Include nitroXPackages/ package.json metadata in results', 'bool, optional; include package metadata') + ), }, }, ({ search_roots, max_depth, include_packages }) => { @@ -589,24 +600,42 @@ export function registerNitroXRead(server: McpServer, config: ServerConfig): voi 'provar_nitrox_read', { title: 'Read NitroX Files', - description: [ - 'Read one or more NitroX .po.json (Hybrid Model page object) files and return their parsed content.', - 'Use this to load examples before generating or validating.', - "Provide file_paths for specific files, or project_path to read all .po.json files from a project's nitroX/ directory.", - ].join(' '), + description: desc( + [ + 'Read one or more NitroX .po.json (Hybrid Model page object) files and return their parsed content.', + 'Use this to load examples before generating or validating.', + "Provide file_paths for specific files, or project_path to read all .po.json files from a project's nitroX/ directory.", + ].join(' '), + 'Read NitroX .po.json files and return parsed content.' + ), inputSchema: { - file_paths: z.array(z.string()).optional().describe('Specific .po.json file paths to read'), + file_paths: z + .array(z.string()) + .optional() + .describe( + desc('Specific .po.json file paths to read', 'array of strings, optional; specific .po.json paths') + ), project_path: z .string() .optional() - .describe('Provar project path — reads all .po.json files from nitroX/ directory'), + .describe( + desc( + 'Provar project path — reads all .po.json files from nitroX/ directory', + 'string, optional; project path to read all nitroX files' + ) + ), max_files: z .number() .int() .min(1) .max(100) .default(20) - .describe('Maximum number of files to return (prevents context overflow)'), + .describe( + desc( + 'Maximum number of files to return (prevents context overflow)', + 'int 1–100, optional; max files returned' + ) + ), }, }, ({ file_paths, project_path, max_files }) => { @@ -690,17 +719,28 @@ export function registerNitroXValidate(server: McpServer, config: ServerConfig): 'provar_nitrox_validate', { title: 'Validate NitroX Component', - description: [ - 'Validate a NitroX .po.json (Hybrid Model component page object) against schema rules.', - 'Works for any NitroX-mapped component type: LWC, Screen Flow, Industry Components, Experience Cloud, HTML5.', - 'Runs two validation passes sequentially: hardcoded semantic rules (NX001–NX010) then JSON schema validation (NX_SCHEMA_* rule IDs).', - 'Schema issues catch structural errors not covered by NX rules: wrong property types, extra properties, enum violations.', - 'Returns a quality score (0–100) and a combined list of issues with rule IDs, severity, and suggestions.', - 'Score formula: 100 − (20 × errors) − (5 × warnings) − (1 × infos).', - ].join(' '), + description: desc( + [ + 'Validate a NitroX .po.json (Hybrid Model component page object) against schema rules.', + 'Works for any NitroX-mapped component type: LWC, Screen Flow, Industry Components, Experience Cloud, HTML5.', + 'Runs two validation passes sequentially: hardcoded semantic rules (NX001–NX010) then JSON schema validation (NX_SCHEMA_* rule IDs).', + 'Schema issues catch structural errors not covered by NX rules: wrong property types, extra properties, enum violations.', + 'Returns a quality score (0–100) and a combined list of issues with rule IDs, severity, and suggestions.', + 'Score formula: 100 − (20 × errors) − (5 × warnings) − (1 × infos).', + ].join(' '), + 'Validate a NitroX .po.json against NX001–NX010 and JSON schema rules.' + ), inputSchema: { - content: z.string().optional().describe('JSON string of the .po.json content to validate'), - file_path: z.string().optional().describe('Path to a .po.json file to validate'), + content: z + .string() + .optional() + .describe( + desc('JSON string of the .po.json content to validate', 'string, optional; JSON content to validate') + ), + file_path: z + .string() + .optional() + .describe(desc('Path to a .po.json file to validate', 'string, optional; path to .po.json file')), }, }, ({ content, file_path }) => { @@ -779,26 +819,58 @@ export function registerNitroXGenerate(server: McpServer, config: ServerConfig): 'provar_nitrox_generate', { title: 'Generate NitroX Components', - description: [ - 'Generate a new NitroX .po.json (Hybrid Model page object) from a component description.', - "Applicable to any component type supported by Provar's Hybrid Model:", - 'LWC, Screen Flow, Industry Components, Experience Cloud, HTML5.', - 'Read the provar-nitrox-component-catalog resource first to understand available component types,', - 'tagName conventions, interaction titles, and attribute patterns from shipped base packages.', - 'All componentId fields are assigned fresh UUIDs. Returns JSON content;', - 'writes to disk only when dry_run=false.', - ].join(' '), + description: desc( + [ + 'Generate a new NitroX .po.json (Hybrid Model page object) from a component description.', + "Applicable to any component type supported by Provar's Hybrid Model:", + 'LWC, Screen Flow, Industry Components, Experience Cloud, HTML5.', + 'Read the provar-nitrox-component-catalog resource first to understand available component types,', + 'tagName conventions, interaction titles, and attribute patterns from shipped base packages.', + 'All componentId fields are assigned fresh UUIDs. Returns JSON content;', + 'writes to disk only when dry_run=false.', + ].join(' '), + 'Generate a NitroX .po.json Hybrid Model page object with fresh UUIDs.' + ), inputSchema: { - name: z.string().describe('Path-like component name, e.g. /com/force/myapp/ButtonComponent'), - tag_name: z.string().describe('LWC or HTML tag name, e.g. lightning-button or c-my-component'), - type: z.enum(['Block', 'Page']).default('Block').describe('Component type'), - page_structure_element: z.boolean().default(true).describe('Whether this is a page structure element'), - field_details_element: z.boolean().default(false).describe('Whether this is a field details element'), - parameters: z.array(ParameterInputSchema).optional().describe('Component parameters/qualifiers'), - elements: z.array(ElementInputSchema).optional().describe('Child elements'), - output_path: z.string().optional().describe('File path to write (requires dry_run=false)'), - overwrite: z.boolean().default(false).describe('Overwrite if output_path already exists'), - dry_run: z.boolean().default(true).describe('Return JSON without writing to disk (default)'), + name: z + .string() + .describe( + desc('Path-like component name, e.g. /com/force/myapp/ButtonComponent', 'string, path-like component name') + ), + tag_name: z + .string() + .describe( + desc('LWC or HTML tag name, e.g. lightning-button or c-my-component', 'string, LWC or HTML tag name') + ), + type: z.enum(['Block', 'Page']).default('Block').describe(desc('Component type', 'enum Block|Page')), + page_structure_element: z + .boolean() + .default(true) + .describe(desc('Whether this is a page structure element', 'bool, optional; default true')), + field_details_element: z + .boolean() + .default(false) + .describe(desc('Whether this is a field details element', 'bool, optional; default false')), + parameters: z + .array(ParameterInputSchema) + .optional() + .describe(desc('Component parameters/qualifiers', 'array, optional; component parameters')), + elements: z + .array(ElementInputSchema) + .optional() + .describe(desc('Child elements', 'array, optional; child elements')), + output_path: z + .string() + .optional() + .describe(desc('File path to write (requires dry_run=false)', 'string, optional; output file path')), + overwrite: z + .boolean() + .default(false) + .describe(desc('Overwrite if output_path already exists', 'bool, optional; overwrite if exists')), + dry_run: z + .boolean() + .default(true) + .describe(desc('Return JSON without writing to disk (default)', 'bool, optional; default true, skip write')), }, }, (input) => { @@ -861,22 +933,42 @@ export function registerNitroXPatch(server: McpServer, config: ServerConfig): vo 'provar_nitrox_patch', { title: 'Patch NitroX Component', - description: [ - 'Apply a JSON merge-patch (RFC 7396) to an existing NitroX .po.json file.', - 'Reads the file, merges the patch (null values remove keys, other values replace or recurse into objects),', - 'optionally validates the merged result, and writes back.', - 'Use dry_run=true (default) to preview the merged output without writing.', - ].join(' '), + description: desc( + [ + 'Apply a JSON merge-patch (RFC 7396) to an existing NitroX .po.json file.', + 'Reads the file, merges the patch (null values remove keys, other values replace or recurse into objects),', + 'optionally validates the merged result, and writes back.', + 'Use dry_run=true (default) to preview the merged output without writing.', + ].join(' '), + 'Apply a JSON merge-patch (RFC 7396) to an existing NitroX .po.json file.' + ), inputSchema: { - file_path: z.string().describe('Path to the existing .po.json file to patch'), + file_path: z + .string() + .describe(desc('Path to the existing .po.json file to patch', 'string, absolute path to .po.json file')), patch: z .record(z.unknown()) - .describe('JSON merge-patch to apply (RFC 7396: null removes key, any other value replaces)'), - dry_run: z.boolean().default(true).describe('Return merged result without writing to disk (default)'), + .describe( + desc( + 'JSON merge-patch to apply (RFC 7396: null removes key, any other value replaces)', + 'object, RFC 7396 merge-patch' + ) + ), + dry_run: z + .boolean() + .default(true) + .describe( + desc('Return merged result without writing to disk (default)', 'bool, optional; default true, skip write') + ), validate_after: z .boolean() .default(true) - .describe('Run NX validation on merged result; blocks write if errors found'), + .describe( + desc( + 'Run NX validation on merged result; blocks write if errors found', + 'bool, optional; default true, validate before write' + ) + ), }, }, ({ file_path, patch, dry_run, validate_after }) => { diff --git a/src/mcp/tools/pageObjectGenerate.ts b/src/mcp/tools/pageObjectGenerate.ts index 4e9fd960..3735a4e3 100644 --- a/src/mcp/tools/pageObjectGenerate.ts +++ b/src/mcp/tools/pageObjectGenerate.ts @@ -14,6 +14,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; const VALID_LOCATOR_STRATEGIES = [ 'xpath', @@ -105,48 +106,97 @@ export function registerPageObjectGenerate(server: McpServer, config: ServerConf 'provar_pageobject_generate', { title: 'Generate Page Object', - description: [ - 'Generate a Provar Java Page Object skeleton with @Page/@SalesforcePage annotation, standard imports, and @FindBy WebElement fields.', - 'Returns Java source. Writes to disk only when dry_run=false.', - 'SSO support: set sso_class to also generate an ILoginPage implementation stub for non-SF SSO pages.', - 'Example: sso_class="LoginPageSso" generates a LoginPageSso.java that implements ILoginPage with loginAs() and logout() stubs.', - 'The ILoginPage stub is written to the same directory as output_path when dry_run=false.', - ].join(' '), + description: desc( + [ + 'Generate a Provar Java Page Object skeleton with @Page/@SalesforcePage annotation, standard imports, and @FindBy WebElement fields.', + 'Returns Java source. Writes to disk only when dry_run=false.', + 'SSO support: set sso_class to also generate an ILoginPage implementation stub for non-SF SSO pages.', + 'Example: sso_class="LoginPageSso" generates a LoginPageSso.java that implements ILoginPage with loginAs() and logout() stubs.', + 'The ILoginPage stub is written to the same directory as output_path when dry_run=false.', + ].join(' '), + 'Generate a Provar Java Page Object skeleton with @Page/@FindBy fields.' + ), inputSchema: { - class_name: z.string().describe('PascalCase class name, e.g. AccountDetailPage'), + class_name: z + .string() + .describe(desc('PascalCase class name, e.g. AccountDetailPage', 'string, PascalCase class name')), package_name: z .string() .default('pageobjects') - .describe('Java package, e.g. pageobjects or pageobjects.accounts'), + .describe( + desc('Java package, e.g. pageobjects or pageobjects.accounts', 'string, optional; Java package name') + ), page_type: z .enum(['standard', 'salesforce']) .default('standard') - .describe('@Page (standard) or @SalesforcePage (salesforce)'), - title: z.string().optional().describe('Page title attribute; defaults to class_name if omitted'), + .describe(desc('@Page (standard) or @SalesforcePage (salesforce)', 'enum standard|salesforce')), + title: z + .string() + .optional() + .describe( + desc('Page title attribute; defaults to class_name if omitted', 'string, optional; page title attribute') + ), connection_name: z .string() .optional() - .describe('Salesforce connection name (required when page_type=salesforce)'), + .describe( + desc( + 'Salesforce connection name (required when page_type=salesforce)', + 'string, optional; SF connection name' + ) + ), salesforce_page_attribute: z .enum(['page', 'auraComponent', 'object', 'lightningWebComponent']) .optional() - .describe('Page type attribute for @SalesforcePage'), - fields: z.array(FieldSchema).default([]).describe('WebElement fields to generate'), + .describe( + desc('Page type attribute for @SalesforcePage', 'enum page|auraComponent|object|lightningWebComponent') + ), + fields: z + .array(FieldSchema) + .default([]) + .describe(desc('WebElement fields to generate', 'array, optional; WebElement fields')), sso_class: z .string() .optional() .describe( - 'PascalCase class name for an ILoginPage implementation stub (non-SF SSO pages). ' + - 'When provided, an additional Java class implementing ILoginPage is generated alongside the page object. ' + - 'Example: "LoginPageSso" → LoginPageSso.java with loginAs() and logout() method stubs.' + desc( + 'PascalCase class name for an ILoginPage implementation stub (non-SF SSO pages). ' + + 'When provided, an additional Java class implementing ILoginPage is generated alongside the page object. ' + + 'Example: "LoginPageSso" → LoginPageSso.java with loginAs() and logout() method stubs.', + 'string, optional; PascalCase class name for ILoginPage SSO stub' + ) ), - output_path: z.string().optional().describe('Suggested file path for the .java file (returned in response)'), - overwrite: z.boolean().default(false).describe('Overwrite existing file when dry_run=false'), + output_path: z + .string() + .optional() + .describe( + desc( + 'Suggested file path for the .java file (returned in response)', + 'string, optional; output .java file path' + ) + ), + overwrite: z + .boolean() + .default(false) + .describe(desc('Overwrite existing file when dry_run=false', 'bool, optional; overwrite if exists')), dry_run: z .boolean() .default(true) - .describe('true = return source only (default); false = write to output_path'), - idempotency_key: z.string().optional().describe('Caller-provided key echoed back for deduplication tracking'), + .describe( + desc( + 'true = return source only (default); false = write to output_path', + 'bool, optional; default true, skip write' + ) + ), + idempotency_key: z + .string() + .optional() + .describe( + desc( + 'Caller-provided key echoed back for deduplication tracking', + 'string, optional; deduplication key echoed in response' + ) + ), }, }, (input) => { diff --git a/src/mcp/tools/pageObjectValidate.ts b/src/mcp/tools/pageObjectValidate.ts index f34891bf..27fc8d0e 100644 --- a/src/mcp/tools/pageObjectValidate.ts +++ b/src/mcp/tools/pageObjectValidate.ts @@ -15,21 +15,35 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId, type ValidationIssue } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; export function registerPageObjectValidate(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_pageobject_validate', { title: 'Validate Page Object', - description: + description: desc( 'Validate a Provar Java Page Object against naming conventions, locator best practices, and structural requirements. Returns quality score (0–100) and list of issues.', + 'Validate a Provar Java Page Object for naming, locators, and structure.' + ), inputSchema: { - content: z.string().optional().describe('Java source code to validate directly'), - file_path: z.string().optional().describe('Path to .java Page Object file'), + content: z + .string() + .optional() + .describe(desc('Java source code to validate directly', 'string, optional; Java source to validate')), + file_path: z + .string() + .optional() + .describe(desc('Path to .java Page Object file', 'string, optional; path to .java file')), expected_class_name: z .string() .optional() - .describe('Expected class name for PO_006 check; inferred from file_path when omitted'), + .describe( + desc( + 'Expected class name for PO_006 check; inferred from file_path when omitted', + 'string, optional; expected class name for PO_006 check' + ) + ), }, }, ({ content, file_path, expected_class_name }) => { diff --git a/src/mcp/tools/projectInspect.ts b/src/mcp/tools/projectInspect.ts index f0690603..b26e8c14 100644 --- a/src/mcp/tools/projectInspect.ts +++ b/src/mcp/tools/projectInspect.ts @@ -14,29 +14,66 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { maskFields, parseFieldsParam } from '../utils/fieldMask.js'; +import { desc } from './descHelper.js'; + +const INSPECT_SUMMARY_FIELDS = ['requestId', 'project_path', 'provar_home', 'summary']; export function registerProjectInspect(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_project_inspect', { title: 'Inspect Project', - description: [ - 'Inspect a Provar project folder and return a structured inventory.', - 'Returns: provardx-properties.json config files (for ProvarDX CLI runs),', - 'ANT build files (build.xml etc in ANT/ dirs, for CLI/pipeline runs),', - 'source page object directories with Java file counts (src/pageobjects — compiled bin/ dirs excluded),', - '.testcase files found recursively under tests/,', - 'count of custom test step files in src/customapis/,', - 'count of data source files (CSV/XLSX/JSON) in data/ and templates/ dirs,', - 'test plan coverage showing which test cases are covered vs uncovered,', - 'and connection + environment overview parsed from the .testproject file', - '(Salesforce, UI Testing, Web Services, Quality Hub, Database, and other connection types).', - ].join(' '), + description: desc( + [ + 'Inspect a Provar project folder and return a structured inventory.', + 'Returns: provardx-properties.json config files (for ProvarDX CLI runs),', + 'ANT build files (build.xml etc in ANT/ dirs, for CLI/pipeline runs),', + 'source page object directories with Java file counts (src/pageobjects — compiled bin/ dirs excluded),', + '.testcase files found recursively under tests/,', + 'count of custom test step files in src/customapis/,', + 'count of data source files (CSV/XLSX/JSON) in data/ and templates/ dirs,', + 'test plan coverage showing which test cases are covered vs uncovered,', + 'and connection + environment overview parsed from the .testproject file', + '(Salesforce, UI Testing, Web Services, Quality Hub, Database, and other connection types).', + ].join(' '), + 'Inspect a Provar project and return inventory of files, plans, and connections.' + ), inputSchema: { - project_path: z.string().describe('Absolute or relative path to the Provar project root directory'), + project_path: z + .string() + .describe( + desc( + 'Absolute or relative path to the Provar project root directory', + 'string, absolute path to project root' + ) + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity: "summary" returns only requestId, project_path, provar_home, and the summary object; ' + + '"standard" (default) returns the full inventory; "full" is identical to standard for this tool.', + 'enum summary|standard|full, optional; default standard' + ) + ), + fields: z + .string() + .optional() + .describe( + desc( + 'Comma-separated list of top-level keys to retain (e.g. "test_case_files,summary"). ' + + 'Supports dot notation for nested filtering (e.g. "test_project.connections"). ' + + 'Unknown field names are silently ignored. Applied after the detail filter.', + 'string, optional; comma-separated keys to keep (supports dot notation)' + ) + ), }, }, - ({ project_path }) => { + ({ project_path, detail, fields }) => { const requestId = makeRequestId(); log('info', 'provar_project_inspect', { requestId, project_path }); @@ -49,7 +86,18 @@ export function registerProjectInspect(server: McpServer, config: ServerConfig): return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(err) }] }; } - const result = buildProjectInventory(resolved, requestId); + let result = buildProjectInventory(resolved, requestId); + + const detailLevel = (detail ?? 'standard') as DetailLevel; + if (detailLevel !== 'standard') { + result = applyDetailLevel(result, detailLevel, INSPECT_SUMMARY_FIELDS); + } + + const fieldList = parseFieldsParam(fields); + if (fieldList) { + result = maskFields(result, fieldList) as typeof result; + } + return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], structuredContent: result, diff --git a/src/mcp/tools/projectValidateFromPath.ts b/src/mcp/tools/projectValidateFromPath.ts index 4c8f8d5c..a27c160a 100644 --- a/src/mcp/tools/projectValidateFromPath.ts +++ b/src/mcp/tools/projectValidateFromPath.ts @@ -6,6 +6,7 @@ */ /* eslint-disable camelcase */ +import path from 'node:path'; import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import type { ServerConfig } from '../server.js'; @@ -14,6 +15,18 @@ import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { validateProjectFromPath, ProjectValidationError } from '../../services/projectValidation.js'; import type { ProjectValidationResult, ValidatedPlan } from '../../services/projectValidation.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + computeContextHash, + type DiffableViolation, +} from '../utils/validationDiff.js'; +import { desc } from './descHelper.js'; // ── Response shaping ────────────────────────────────────────────────────────── @@ -31,6 +44,30 @@ interface ViolationSummary { sample_message: string; } +function countAllProjectViolations(result: ProjectValidationResult): number { + // Note: ValidatedTestCase does not carry best_practices_violations at the project + // layer (intentional — bp surfaces via the testcase tool). The count here covers + // every violation visible at project/plan/suite/child_suite level plus per-tc + // structural issues, which is what the stop-decision safety hedge needs. + let total = result.project_violations.length; + for (const plan of result.plans) { + total += plan.violations.length; + for (const suite of plan.suites) { + total += suite.violations.length; + for (const tc of suite.test_cases) { + total += tc.issues.length; + } + for (const cs of suite.child_suites) { + total += cs.violations.length; + } + } + for (const utc of plan.unplanned_test_cases) { + total += utc.issues.length; + } + } + return total; +} + function buildPlanSummary(plan: ValidatedPlan): PlanSummary { const test_case_count = plan.suites.reduce((n, s) => n + s.test_cases.length, 0) + plan.unplanned_test_cases.length; return { @@ -103,57 +140,105 @@ function shapeResponse( }; } +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function classifyError(err: Error & { code?: string }): { code: string; isUserError: boolean } { + if (err instanceof PathPolicyError || err instanceof ProjectValidationError) { + return { code: err.code, isUserError: true }; + } + return { code: err.code ?? 'VALIDATE_ERROR', isUserError: false }; +} + // ── Tool registration ───────────────────────────────────────────────────────── +const PROJECT_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'project_path', + 'project_name', + 'quality_score', + 'quality_tier', + 'saved_to', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + export function registerProjectValidateFromPath(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_project_validate', { title: 'Validate Project', - description: [ - 'Validate a Provar project directly from its directory on disk.', - 'Reads the plan/suite/testinstance hierarchy from the plans/ directory,', - 'resolves test case XML from the tests/ directory, extracts project context', - '(connections, environments, secrets) from the .testproject file, then runs', - 'the full validation rule set.', - 'Returns a compact quality score, violation summary, and per-plan/suite scores.', - 'By default returns a slim summary response to avoid token explosion.', - 'Pass include_plan_details:true to get full per-suite and per-test-case data.', - 'By default saves a QH-compatible JSON report to', - '{project_path}/provardx/validation/ (created if absent).', - 'Plan integrity: if any plan or suite directory is missing a .planitem file, the response includes a plan_integrity_warnings array.', - 'Test instances in those directories are silently ignored by the Provar runner — fix these before running tests.', - 'IMPORTANT: Use this tool for whole-project validation —', - 'DO NOT read individual test case files and pass XML content inline.', - 'Pass a project_path and let this tool handle all file reading.', - ].join(' '), + description: desc( + [ + 'Validate a Provar project directly from its directory on disk.', + 'Reads the plan/suite/testinstance hierarchy from the plans/ directory,', + 'resolves test case XML from the tests/ directory, extracts project context', + '(connections, environments, secrets) from the .testproject file, then runs', + 'the full validation rule set.', + 'Returns a compact quality score, violation summary, and per-plan/suite scores.', + 'By default returns a slim summary response to avoid token explosion.', + 'Pass include_plan_details:true or detail:full to get full per-suite and per-test-case data.', + 'By default saves a QH-compatible JSON report to', + '{project_path}/provardx/validation/ (created if absent).', + 'Plan integrity: if any plan or suite directory is missing a .planitem file, the response includes a plan_integrity_warnings array.', + 'Test instances in those directories are silently ignored by the Provar runner — fix these before running tests.', + 'Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved violations.', + 'IMPORTANT: Use this tool for whole-project validation —', + 'DO NOT read individual test case files and pass XML content inline.', + 'Pass a project_path and let this tool handle all file reading.', + ].join(' '), + 'Validate a Provar project from disk; quality score, violation summary, run_id for diff.' + ), inputSchema: { project_path: z .string() - .describe('Absolute path to the Provar project root (the directory containing the .testproject file)'), + .describe( + desc( + 'Absolute path to the Provar project root (the directory containing the .testproject file)', + 'string, absolute path to project root' + ) + ), quality_threshold: z .number() .min(0) .max(100) .optional() .default(80) - .describe('Minimum quality score for a test case to be considered valid (default: 80)'), + .describe( + desc( + 'Minimum quality score for a test case to be considered valid (default: 80)', + 'number 0–100, optional; minimum quality score threshold' + ) + ), save_results: z .boolean() .optional() .default(true) - .describe('Write a QH-compatible JSON report to provardx/validation/ (default: true)'), + .describe( + desc( + 'Write a QH-compatible JSON report to provardx/validation/ (default: true)', + 'bool, optional; default true, write report to disk' + ) + ), results_dir: z .string() .optional() - .describe('Override the output directory for the saved report (default: {project_path}/provardx/validation)'), + .describe( + desc( + 'Override the output directory for the saved report (default: {project_path}/provardx/validation)', + 'string, optional; override report output dir' + ) + ), include_plan_details: z .boolean() .optional() .default(false) .describe( - 'When true, include full per-suite and per-test-case violation data in the response. ' + - 'Default false to keep response small. Use only when you need to inspect specific test case failures.' + desc( + '@deprecated — use detail="full" instead. When true, include full per-suite and per-test-case violation data in the response. ' + + 'Default false to keep response small.', + 'bool, optional, @deprecated; use detail="full" instead' + ) ), max_uncovered: z .number() @@ -162,7 +247,10 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve .optional() .default(20) .describe( - 'Maximum number of uncovered test case paths to include in the response (default: 20). Set to 0 for none, or a large number for all.' + desc( + '@deprecated — no replacement; response is automatically scoped by detail level. Maximum number of uncovered test case paths to include in the response (default: 20).', + 'int ≥0, optional, @deprecated; auto-scoped by detail' + ) ), max_violations: z .number() @@ -171,7 +259,23 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve .optional() .default(50) .describe( - 'When include_plan_details:true, caps project_violations returned (default: 50). Ignored in slim mode where violations are grouped by rule_id instead.' + desc( + '@deprecated — no replacement; response is automatically scoped by detail level. When include_plan_details:true, caps project_violations returned (default: 50).', + 'int ≥0, optional, @deprecated; auto-scoped by detail' + ) + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + 'Response verbosity. "summary": key scores and stop signal only. "standard": slim violation summary (default). "full": full per-suite and per-test-case data (implies include_plan_details:true).' + ), + baseline_run_id: z + .string() + .optional() + .describe( + 'run_id from a previous call. When provided, returns only project-level violations that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.' ), }, }, @@ -183,6 +287,8 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve include_plan_details, max_uncovered, max_violations, + detail, + baseline_run_id, }) => { const requestId = makeRequestId(); log('info', 'provar_project_validate', { requestId, project_path, include_plan_details }); @@ -191,6 +297,10 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve assertPathAllowed(project_path, config.allowedPaths); if (results_dir) assertPathAllowed(results_dir, config.allowedPaths); + const storageDir = results_dir ?? path.join(project_path, 'provardx', 'validation'); + const contextHash = computeContextHash('project', project_path); + const runId = generateRunId(project_path); + const result = validateProjectFromPath({ project_path, quality_threshold, @@ -202,22 +312,87 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve log('warn', 'provar_project_validate: could not save results', { requestId, error: result.save_error }); } - const shaped = shapeResponse(result, include_plan_details, max_uncovered, max_violations); - const response = { requestId, ...shaped }; + const currentViolations = result.project_violations as unknown as DiffableViolation[]; + const allLevelViolationCount = countAllProjectViolations(result); + + // Read baseline and history regardless of save_results — save_results controls + // whether the CURRENT run is persisted, not whether existing runs can be read. + // Load baseline BEFORE saving to prevent eviction of the requested baseline. + const baseline = + baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id, contextHash) + : null; + + const hasBaseline = hasAnyRun(storageDir); + + if (save_results !== false) { + try { + saveRun(storageDir, runId, currentViolations, contextHash); + } catch (saveErr) { + log('warn', 'provar_project_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } + } + + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', + requestId, + false, + { suggestion: 'Run provar_project_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; + } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore( + result.summary.test_cases_valid, + result.summary.total_test_cases + ); + const recommended_next_action = calcNextAction(completeness_score, true, allLevelViolationCount); + const diffResponse = { + requestId, + ...(save_results !== false ? { run_id: runId } : {}), + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; + } + + const completeness_score = calcCompletenessScore( + result.summary.test_cases_valid, + result.summary.total_test_cases + ); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, allLevelViolationCount); + + const usePlanDetails = include_plan_details || detail === 'full'; + const shaped = shapeResponse(result, usePlanDetails, max_uncovered, max_violations); + const response = { + requestId, + ...(save_results !== false ? { run_id: runId } : {}), + completeness_score, + recommended_next_action, + ...shaped, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, PROJECT_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error & { code?: string }; - const code = - error instanceof PathPolicyError - ? error.code - : error instanceof ProjectValidationError - ? error.code - : error.code ?? 'VALIDATE_ERROR'; - const isUserError = error instanceof PathPolicyError || error instanceof ProjectValidationError; + const { code, isUserError } = classifyError(error); const errResult = makeError(code, error.message, requestId, !isUserError); log('error', 'provar_project_validate failed', { requestId, error: error.message }); return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; diff --git a/src/mcp/tools/propertiesTools.ts b/src/mcp/tools/propertiesTools.ts index 3395dcd8..f7b558df 100644 --- a/src/mcp/tools/propertiesTools.ts +++ b/src/mcp/tools/propertiesTools.ts @@ -16,6 +16,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; // ── Validation helpers ──────────────────────────────────────────────────────── @@ -163,23 +164,50 @@ export function registerPropertiesGenerate(server: McpServer, config: ServerConf 'provar_properties_generate', { title: 'Generate ProvarDX Properties File', - description: [ - 'Generate a provardx-properties.json file from the standard template.', - 'Optionally pre-fills projectPath and provarHome if provided.', - 'The generated file uses ${PLACEHOLDER} values that must be replaced before running tests.', - 'Use provar_properties_set afterwards to update specific fields.', - ].join(' '), + description: desc( + [ + 'Generate a provardx-properties.json file from the standard template.', + 'Optionally pre-fills projectPath and provarHome if provided.', + 'The generated file uses ${PLACEHOLDER} values that must be replaced before running tests.', + 'Use provar_properties_set afterwards to update specific fields.', + ].join(' '), + 'Generate a provardx-properties.json from the standard template.' + ), inputSchema: { - output_path: z.string().describe('Where to write the file (e.g. /path/to/project/provardx-properties.json)'), - project_path: z.string().optional().describe('Pre-fill the projectPath field with this value'), - provar_home: z.string().optional().describe('Pre-fill the provarHome field with this value'), - results_path: z.string().optional().describe('Pre-fill the resultsPath field with this value'), + output_path: z + .string() + .describe( + desc( + 'Where to write the file (e.g. /path/to/project/provardx-properties.json)', + 'string, absolute path for output .json file' + ) + ), + project_path: z + .string() + .optional() + .describe(desc('Pre-fill the projectPath field with this value', 'string, optional; pre-fill projectPath')), + provar_home: z + .string() + .optional() + .describe(desc('Pre-fill the provarHome field with this value', 'string, optional; pre-fill provarHome')), + results_path: z + .string() + .optional() + .describe(desc('Pre-fill the resultsPath field with this value', 'string, optional; pre-fill resultsPath')), overwrite: z .boolean() .optional() .default(false) - .describe('Overwrite the file if it already exists (default: false)'), - dry_run: z.boolean().optional().default(false).describe('Return the content without writing (default: false)'), + .describe( + desc('Overwrite the file if it already exists (default: false)', 'bool, optional; overwrite if exists') + ), + dry_run: z + .boolean() + .optional() + .default(false) + .describe( + desc('Return the content without writing (default: false)', 'bool, optional; default false, skip write') + ), }, }, ({ output_path, project_path, provar_home, results_path, overwrite, dry_run }) => { @@ -326,10 +354,16 @@ export function registerPropertiesRead(server: McpServer, config: ServerConfig): 'provar_properties_read', { title: 'Read Properties File', - description: + description: desc( 'Read and parse a provardx-properties.json file. Returns the parsed content so you can inspect current settings before making changes with provar_properties_set.', + 'Read and parse a provardx-properties.json file.' + ), inputSchema: { - file_path: z.string().describe('Path to the provardx-properties.json file'), + file_path: z + .string() + .describe( + desc('Path to the provardx-properties.json file', 'string, absolute path to provardx-properties.json') + ), }, }, ({ file_path }) => { @@ -483,15 +517,25 @@ export function registerPropertiesSet(server: McpServer, config: ServerConfig): 'provar_properties_set', { title: 'Set Property Value', - description: [ - 'Update one or more fields in a provardx-properties.json file.', - 'Only the provided fields are changed — all other fields are preserved.', - 'Object fields (environment, metadata) are deep-merged.', - 'Array fields (testCase, testPlan, connectionOverride) replace the existing value entirely.', - 'Use provar_properties_read first to inspect the current state.', - ].join(' '), + description: desc( + [ + 'Update one or more fields in a provardx-properties.json file.', + 'Only the provided fields are changed — all other fields are preserved.', + 'Object fields (environment, metadata) are deep-merged.', + 'Array fields (testCase, testPlan, connectionOverride) replace the existing value entirely.', + 'Use provar_properties_read first to inspect the current state.', + ].join(' '), + 'Update fields in a provardx-properties.json; other fields preserved.' + ), inputSchema: { - file_path: z.string().describe('Path to the provardx-properties.json file to update'), + file_path: z + .string() + .describe( + desc( + 'Path to the provardx-properties.json file to update', + 'string, absolute path to provardx-properties.json' + ) + ), updates: updatesSchema, }, }, @@ -576,14 +620,33 @@ export function registerPropertiesValidate(server: McpServer, config: ServerConf 'provar_properties_validate', { title: 'Validate ProvarDX Properties File', - description: [ - 'Validate a provardx-properties.json file against the ProvarDX schema.', - 'Checks required fields, valid enum values, and warns about unfilled placeholder values.', - 'Accepts either a file path or inline JSON content.', - ].join(' '), + description: desc( + [ + 'Validate a provardx-properties.json file against the ProvarDX schema.', + 'Checks required fields, valid enum values, and warns about unfilled placeholder values.', + 'Accepts either a file path or inline JSON content.', + ].join(' '), + 'Validate a provardx-properties.json against required fields and enum values.' + ), inputSchema: { - file_path: z.string().optional().describe('Path to the provardx-properties.json file to validate'), - content: z.string().optional().describe('Inline JSON string to validate (alternative to file_path)'), + file_path: z + .string() + .optional() + .describe( + desc( + 'Path to the provardx-properties.json file to validate', + 'string, optional; path to provardx-properties.json' + ) + ), + content: z + .string() + .optional() + .describe( + desc( + 'Inline JSON string to validate (alternative to file_path)', + 'string, optional; inline JSON to validate' + ) + ), }, }, ({ file_path, content }) => { diff --git a/src/mcp/tools/qualityHubApiTools.ts b/src/mcp/tools/qualityHubApiTools.ts index 0a76553d..1fc3d8e9 100644 --- a/src/mcp/tools/qualityHubApiTools.ts +++ b/src/mcp/tools/qualityHubApiTools.ts @@ -18,6 +18,7 @@ import { QualityHubRateLimitError, REQUEST_ACCESS_URL, } from '../../services/qualityHub/client.js'; +import { desc } from './descHelper.js'; const CORPUS_FALLBACK_HINT = 'Fallback: read the provar://docs/step-reference MCP resource for step types and attribute formats, then continue.'; @@ -49,27 +50,33 @@ export function registerCorpusExamplesRetrieve(server: McpServer): void { 'provar_qualityhub_examples_retrieve', { title: 'Retrieve Corpus Examples', - description: [ - 'Retrieve N similar Provar test case examples from the Quality Hub corpus (1000+ tests in Bedrock KB).', - 'Use this BEFORE writing any Provar .testcase XML — whether via provar_testcase_generate, Write, or Edit.', - 'Pass a user story, requirement, source test file content, or step type keywords as the query.', - 'Returns up to N example Provar XML test cases ordered by similarity score.', - 'If retrieval fails (no auth, network error, rate limit), returns empty examples with a warning — the', - 'generation workflow can still continue without grounding. Never hard-errors on API failure.', - '', - 'For org-specific field metadata: first call getObjectSchema from the Salesforce Hosted MCP', - '(platform/sobject-reads — https://api.salesforce.com/platform/mcp/v1/platform/sobject-reads),', - 'then include key field names in your query (e.g. "Opportunity: CloseDate, Amount, StageName").', - '', - 'Requires a Provar API key (sf provar auth login). Without a key, returns empty examples with onboarding instructions.', - ].join('\n'), + description: desc( + [ + 'Retrieve N similar Provar test case examples from the Quality Hub corpus (1000+ tests in Bedrock KB).', + 'Use this BEFORE writing any Provar .testcase XML — whether via provar_testcase_generate, Write, or Edit.', + 'Pass a user story, requirement, source test file content, or step type keywords as the query.', + 'Returns up to N example Provar XML test cases ordered by similarity score.', + 'If retrieval fails (no auth, network error, rate limit), returns empty examples with a warning — the', + 'generation workflow can still continue without grounding. Never hard-errors on API failure.', + '', + 'For org-specific field metadata: first call getObjectSchema from the Salesforce Hosted MCP', + '(platform/sobject-reads — https://api.salesforce.com/platform/mcp/v1/platform/sobject-reads),', + 'then include key field names in your query (e.g. "Opportunity: CloseDate, Amount, StageName").', + '', + 'Requires a Provar API key (sf provar auth login). Without a key, returns empty examples with onboarding instructions.', + ].join('\n'), + 'Retrieve similar Provar test case examples from the Quality Hub corpus.' + ), inputSchema: { query: z .string() .describe( - 'Text to search against the corpus — a user story, requirement description, or source test file content. ' + - 'Longer is better: include Salesforce object names, field names, and action descriptions. ' + - 'Truncated server-side at 2000 characters.' + desc( + 'Text to search against the corpus — a user story, requirement description, or source test file content. ' + + 'Longer is better: include Salesforce object names, field names, and action descriptions. ' + + 'Truncated server-side at 2000 characters.', + 'string, user story or requirement text; include SF object/field names' + ) ), n: z .number() @@ -78,18 +85,26 @@ export function registerCorpusExamplesRetrieve(server: McpServer): void { .max(10) .optional() .default(5) - .describe('Number of examples to return. Default 5, max 10.'), + .describe(desc('Number of examples to return. Default 5, max 10.', 'int 1–10, optional; examples to return')), app_filter: z .string() .optional() .describe( - 'Optional Salesforce cloud filter to bias results (e.g. "SalesCloud", "ServiceCloud", "HealthCloud").' + desc( + 'Optional Salesforce cloud filter to bias results (e.g. "SalesCloud", "ServiceCloud", "HealthCloud").', + 'string, optional; SF cloud filter e.g. SalesCloud' + ) ), prefer_high_quality: z .boolean() .optional() .default(true) - .describe('When true (default), favours tier4/tier3 corpus examples. Set false to include all tiers.'), + .describe( + desc( + 'When true (default), favours tier4/tier3 corpus examples. Set false to include all tiers.', + 'bool, optional; default true, prefer high-quality corpus examples' + ) + ), }, }, async ({ query, n, app_filter, prefer_high_quality }) => { diff --git a/src/mcp/tools/qualityHubTools.ts b/src/mcp/tools/qualityHubTools.ts index aa464af6..ca5921ea 100644 --- a/src/mcp/tools/qualityHubTools.ts +++ b/src/mcp/tools/qualityHubTools.ts @@ -10,7 +10,10 @@ import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { maskFields, parseFieldsParam } from '../utils/fieldMask.js'; import { runSfCommand } from './sfSpawn.js'; +import { desc } from './descHelper.js'; function handleSpawnError( err: unknown, @@ -30,6 +33,8 @@ function handleSpawnError( }; } +const QH_SUMMARY_FIELDS = ['requestId', 'exitCode']; + // ── Tool: provar_qualityhub_connect ─────────────────────────────────────────── export function registerQualityHubConnect(server: McpServer): void { @@ -37,22 +42,31 @@ export function registerQualityHubConnect(server: McpServer): void { 'provar_qualityhub_connect', { title: 'Connect to Quality Hub', - description: + description: desc( 'Connect to a Provar Quality Hub org. Invokes `sf provar quality-hub connect` with the supplied flags.', + 'Connect to a Provar Quality Hub org via sf CLI.' + ), inputSchema: { - target_org: z.string().describe('SF org alias or username to connect as'), + target_org: z + .string() + .describe(desc('SF org alias or username to connect as', 'string, SF org alias or username')), flags: z .array(z.string()) .optional() .default([]) - .describe('Additional raw CLI flags to forward (e.g. ["--json"])'), + .describe( + desc('Additional raw CLI flags to forward (e.g. ["--json"])', 'array of strings, optional; extra CLI flags') + ), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) ), }, }, @@ -94,21 +108,51 @@ export function registerQualityHubDisplay(server: McpServer): void { 'provar_qualityhub_display', { title: 'Display Quality Hub Info', - description: 'Display connected Quality Hub org info. Invokes `sf provar quality-hub display`.', + description: desc( + 'Display connected Quality Hub org info. Invokes `sf provar quality-hub display`.', + 'Display connected Quality Hub org info via sf CLI.' + ), inputSchema: { - target_org: z.string().optional().describe('SF org alias or username (uses default if omitted)'), - flags: z.array(z.string()).optional().default([]).describe('Additional raw CLI flags to forward'), + target_org: z + .string() + .optional() + .describe( + desc('SF org alias or username (uses default if omitted)', 'string, optional; SF org alias or username') + ), + flags: z + .array(z.string()) + .optional() + .default([]) + .describe(desc('Additional raw CLI flags to forward', 'array of strings, optional; extra CLI flags')), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + 'Response verbosity: "summary" returns only requestId and exitCode; ' + + '"standard" (default) returns requestId, exitCode, stdout, and stderr.' + ), + fields: z + .string() + .optional() + .describe( + 'Comma-separated list of response keys to retain (e.g. "exitCode,stdout"). ' + + 'Unknown field names are silently ignored. Applied after the detail filter.' ), }, }, - ({ target_org, flags, sf_path }) => { + ({ target_org, flags, sf_path, detail, fields }) => { const requestId = makeRequestId(); log('info', 'provar_qualityhub_display', { requestId, target_org }); @@ -117,7 +161,12 @@ export function registerQualityHubDisplay(server: McpServer): void { if (target_org) args.splice(3, 0, '--target-org', target_org); const result = runSfCommand(args, sf_path); - const response = { requestId, exitCode: result.exitCode, stdout: result.stdout, stderr: result.stderr }; + let response: Record = { + requestId, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + }; if (result.exitCode !== 0) { return { @@ -131,6 +180,15 @@ export function registerQualityHubDisplay(server: McpServer): void { }; } + const detailLevel = (detail ?? 'standard') as DetailLevel; + if (detailLevel !== 'standard') { + response = applyDetailLevel(response, detailLevel, QH_SUMMARY_FIELDS); + } + const fieldList = parseFieldsParam(fields); + if (fieldList) { + response = maskFields(response, fieldList) as Record; + } + return { content: [{ type: 'text' as const, text: JSON.stringify(response) }], structuredContent: response }; } catch (err) { return handleSpawnError(err, requestId, 'provar_qualityhub_display'); @@ -162,25 +220,33 @@ export function registerQualityHubTestRun(server: McpServer): void { 'provar_qualityhub_testrun', { title: 'Trigger Quality Hub Test Run', - description: + description: desc( 'Trigger a Quality Hub test run. Invokes `sf provar quality-hub test run`. ' + - 'Warning: wildcard characters (* or ?) in flag values will cause QH plan-level reporting to be skipped — use exact plan names.', + 'Warning: wildcard characters (* or ?) in flag values will cause QH plan-level reporting to be skipped — use exact plan names.', + 'Trigger a Quality Hub test run via sf CLI; use exact plan names.' + ), inputSchema: { - target_org: z.string().describe('SF org alias or username'), + target_org: z.string().describe(desc('SF org alias or username', 'string, SF org alias or username')), flags: z .array(z.string()) .optional() .default([]) .describe( - 'Additional raw CLI flags (e.g. ["--plan-name", "SmokeTests"]). Avoid wildcards in --plan-name values — they skip QH plan-level reporting.' + desc( + 'Additional raw CLI flags (e.g. ["--plan-name", "SmokeTests"]). Avoid wildcards in --plan-name values — they skip QH plan-level reporting.', + 'array of strings, optional; extra CLI flags; avoid wildcards in --plan-name' + ) ), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) ), }, }, @@ -232,18 +298,32 @@ export function registerQualityHubTestRunReport(server: McpServer): void { 'provar_qualityhub_testrun_report', { title: 'Poll Quality Hub Test Run', - description: 'Poll the status of a Quality Hub test run. Invokes `sf provar quality-hub test run report`.', + description: desc( + 'Poll the status of a Quality Hub test run. Invokes `sf provar quality-hub test run report`.', + 'Poll a Quality Hub test run status via sf CLI.' + ), inputSchema: { - target_org: z.string().describe('SF org alias or username'), - run_id: z.string().describe('Test run ID returned by provar_qualityhub_testrun'), - flags: z.array(z.string()).optional().default([]).describe('Additional raw CLI flags'), + target_org: z.string().describe(desc('SF org alias or username', 'string, SF org alias or username')), + run_id: z + .string() + .describe( + desc('Test run ID returned by provar_qualityhub_testrun', 'string, run ID from qualityhub_testrun') + ), + flags: z + .array(z.string()) + .optional() + .default([]) + .describe(desc('Additional raw CLI flags', 'array of strings, optional; extra CLI flags')), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) ), }, }, @@ -304,18 +384,28 @@ export function registerQualityHubTestRunAbort(server: McpServer): void { 'provar_qualityhub_testrun_abort', { title: 'Abort Quality Hub Test Run', - description: 'Abort an in-progress Quality Hub test run. Invokes `sf provar quality-hub test run abort`.', + description: desc( + 'Abort an in-progress Quality Hub test run. Invokes `sf provar quality-hub test run abort`.', + 'Abort an in-progress Quality Hub test run via sf CLI.' + ), inputSchema: { - target_org: z.string().describe('SF org alias or username'), - run_id: z.string().describe('Test run ID to abort'), - flags: z.array(z.string()).optional().default([]).describe('Additional raw CLI flags'), + target_org: z.string().describe(desc('SF org alias or username', 'string, SF org alias or username')), + run_id: z.string().describe(desc('Test run ID to abort', 'string, run ID to abort')), + flags: z + .array(z.string()) + .optional() + .default([]) + .describe(desc('Additional raw CLI flags', 'array of strings, optional; extra CLI flags')), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) ), }, }, @@ -357,26 +447,51 @@ export function registerQualityHubTestcaseRetrieve(server: McpServer): void { 'provar_qualityhub_testcase_retrieve', { title: 'Retrieve Quality Hub Test Cases', - description: + description: desc( 'Retrieve Quality Hub test cases by user story or component. Invokes `sf provar quality-hub testcase retrieve`.', + 'Retrieve Quality Hub test cases by user story or component via sf CLI.' + ), inputSchema: { - target_org: z.string().describe('SF org alias or username'), + target_org: z.string().describe(desc('SF org alias or username', 'string, SF org alias or username')), flags: z .array(z.string()) .optional() .default([]) - .describe('Additional raw CLI flags (e.g. ["--user-story", "US-123"])'), + .describe( + desc( + 'Additional raw CLI flags (e.g. ["--user-story", "US-123"])', + 'array of strings, optional; extra CLI flags e.g. --user-story' + ) + ), sf_path: z .string() .optional() .describe( - 'Path to the sf CLI executable when not in PATH ' + - '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + - 'Leave unset to use auto-discovery.' + desc( + 'Path to the sf CLI executable when not in PATH ' + + '(e.g. "C:\\\\Program Files\\\\sf\\\\bin\\\\sf.cmd" for the Windows standalone installer). ' + + 'Leave unset to use auto-discovery.', + 'string, optional; path to sf CLI executable' + ) + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + 'Response verbosity: "summary" returns only requestId and exitCode; ' + + '"standard" (default) returns requestId, exitCode, stdout, and stderr.' + ), + fields: z + .string() + .optional() + .describe( + 'Comma-separated list of response keys to retain (e.g. "exitCode,stdout"). ' + + 'Unknown field names are silently ignored. Applied after the detail filter.' ), }, }, - ({ target_org, flags, sf_path }) => { + ({ target_org, flags, sf_path, detail, fields }) => { const requestId = makeRequestId(); log('info', 'provar_qualityhub_testcase_retrieve', { requestId, target_org }); @@ -385,7 +500,12 @@ export function registerQualityHubTestcaseRetrieve(server: McpServer): void { ['provar', 'quality-hub', 'testcase', 'retrieve', '--target-org', target_org, ...flags], sf_path ); - const response = { requestId, exitCode: result.exitCode, stdout: result.stdout, stderr: result.stderr }; + let response: Record = { + requestId, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + }; if (result.exitCode !== 0) { return { @@ -399,6 +519,15 @@ export function registerQualityHubTestcaseRetrieve(server: McpServer): void { }; } + const detailLevel = (detail ?? 'standard') as DetailLevel; + if (detailLevel !== 'standard') { + response = applyDetailLevel(response, detailLevel, QH_SUMMARY_FIELDS); + } + const fieldList = parseFieldsParam(fields); + if (fieldList) { + response = maskFields(response, fieldList) as Record; + } + return { content: [{ type: 'text' as const, text: JSON.stringify(response) }], structuredContent: response }; } catch (err) { return handleSpawnError(err, requestId, 'provar_qualityhub_testcase_retrieve'); diff --git a/src/mcp/tools/rcaTools.ts b/src/mcp/tools/rcaTools.ts index 5a4df233..f37b6a97 100644 --- a/src/mcp/tools/rcaTools.ts +++ b/src/mcp/tools/rcaTools.ts @@ -16,6 +16,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; // ── Types ───────────────────────────────────────────────────────────────────── @@ -395,24 +396,39 @@ export function registerTestRunLocate(server: McpServer): void { 'provar_testrun_report_locate', { title: 'Locate Test Report', - description: [ - 'Resolve exactly where Provar test run artifacts were written, without parsing them.', - 'Returns the results directory, paths to JUnit.xml and Index.html if they exist,', - 'paths to per-test HTML reports, and any validation JSON files.', - 'Supports explicit results_path override or auto-detection from sf config, provardx properties file, or ANT build.xml.', - ].join(' '), + description: desc( + [ + 'Resolve exactly where Provar test run artifacts were written, without parsing them.', + 'Returns the results directory, paths to JUnit.xml and Index.html if they exist,', + 'paths to per-test HTML reports, and any validation JSON files.', + 'Supports explicit results_path override or auto-detection from sf config, provardx properties file, or ANT build.xml.', + ].join(' '), + 'Resolve Provar test run artifact locations without parsing them.' + ), inputSchema: { - project_path: z.string().describe('Absolute path to the Provar project root'), + project_path: z + .string() + .describe(desc('Absolute path to the Provar project root', 'string, absolute path to project root')), results_path: z .string() .optional() - .describe('Explicit override for the results base directory; if provided, skip auto-detection'), + .describe( + desc( + 'Explicit override for the results base directory; if provided, skip auto-detection', + 'string, optional; explicit results base dir override' + ) + ), run_index: z .number() .int() .positive() .optional() - .describe('Which Increment run to target (default: latest); must be a positive integer'), + .describe( + desc( + 'Which Increment run to target (default: latest); must be a positive integer', + 'int >0, optional; Increment run index' + ) + ), }, }, (input) => { @@ -675,38 +691,61 @@ export function registerTestRunRca(server: McpServer, config: ServerConfig): voi 'provar_testrun_rca', { title: 'Root Cause Analysis', - description: [ - 'Parse a completed Provar test run and produce a structured Root Cause Analysis (RCA) report.', - 'Resolves the results directory, parses JUnit.xml, classifies each failure by category,', - 'and produces recommendations. Use locate_only=true to skip parsing and just resolve artifact locations.', - 'Use mode="failures" to get a lightweight array of failed test cases', - '([{ testItemId, title, errorMessage }]) without the full RCA classification — useful when you', - 'need failure names quickly without loading the HTML report.', - ].join(' '), + description: desc( + [ + 'Parse a completed Provar test run and produce a structured Root Cause Analysis (RCA) report.', + 'Resolves the results directory, parses JUnit.xml, classifies each failure by category,', + 'and produces recommendations. Use locate_only=true to skip parsing and just resolve artifact locations.', + 'Use mode="failures" to get a lightweight array of failed test cases', + '([{ testItemId, title, errorMessage }]) without the full RCA classification — useful when you', + 'need failure names quickly without loading the HTML report.', + ].join(' '), + 'Parse a Provar test run JUnit.xml and produce an RCA report with failure classification.' + ), inputSchema: { - project_path: z.string().describe('Absolute path to the Provar project root'), + project_path: z + .string() + .describe(desc('Absolute path to the Provar project root', 'string, absolute path to project root')), results_path: z .string() .optional() - .describe('Explicit override for the results base directory; must be within --allowed-paths if provided'), + .describe( + desc( + 'Explicit override for the results base directory; must be within --allowed-paths if provided', + 'string, optional; explicit results base dir override' + ) + ), run_index: z .number() .int() .positive() .optional() - .describe('Which Increment run to target (default: latest); must be a positive integer'), + .describe( + desc( + 'Which Increment run to target (default: latest); must be a positive integer', + 'int >0, optional; Increment run index' + ) + ), locate_only: z .boolean() .optional() .default(false) - .describe('If true, skip parsing and return just artifact locations'), + .describe( + desc( + 'If true, skip parsing and return just artifact locations', + 'bool, optional; default false, skip parsing' + ) + ), mode: z .enum(['rca', 'failures']) .optional() .default('rca') .describe( - '"rca" (default): full root-cause analysis with classification and recommendations. ' + - '"failures": lightweight array of failed test cases [{ testItemId, title, errorMessage }].' + desc( + '"rca" (default): full root-cause analysis with classification and recommendations. ' + + '"failures": lightweight array of failed test cases [{ testItemId, title, errorMessage }].', + 'enum rca|failures; default rca' + ) ), }, }, diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index 6310a810..6e97b736 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -16,6 +16,7 @@ import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { validateTestCase } from './testCaseValidate.js'; +import { desc } from './descHelper.js'; // ── Shorthand → fully-qualified API ID map ──────────────────────────────────── // Provar runtime requires fully-qualified IDs. Shorthand forms are accepted here @@ -117,6 +118,15 @@ const StepSchema = z.object({ }); const TOOL_DESCRIPTION = [ + // ── Construction contract (READ FIRST — PDX-482) ────────────────────────────── + // The PDX-479 regression happened when authoring guidance steered agents toward + // a per-step construction pattern via repeated step_edit calls. These three + // lines make the single-call contract authoritative at the call site so it + // outweighs any conflicting prompt/resource guidance and survives doc drift. + 'Construction pattern: pass the FULL step tree in a single call via the steps[] array.', + 'Do NOT call this tool with an empty steps[] and then append via provar_testcase_step_edit — that pattern drops scenarios, flattens nesting, and produces inconsistent step types.', + 'provar_testcase_step_edit is for AMENDING an existing validated test case (single-step add, attribute fix, debug edit), not for CONSTRUCTING one from scratch. If you find yourself about to call this tool with steps=[] intending to add steps in subsequent tool calls, stop and assemble the full step list first.', + // ── Existing description (unchanged below) ─────────────────────────────────── 'Generate a Provar XML test case skeleton with proper UUID v4 guids, sequential testItemId values, and structure.', 'Returns XML content. Writes to disk only when dry_run=false.', 'Generated structure: with (id is always the integer literal "1" as required by the Provar runtime), a child, then .', @@ -157,32 +167,90 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_generate', { - title: 'Generate Test Case', - description: TOOL_DESCRIPTION, + // PDX-484: carry the construct-vs-amend contract into the `title:` field + // because many MCP clients (Claude Desktop tool-picker chips, Cursor audit + // pane, inline tool-call references in chat threads) render only the title. + // Without the "(full steps in one call)" suffix an agent that reads only + // the title surface gets zero PDX-479 protection. Length: 43 chars — + // well under the ~50 char comfort threshold for the clients we test. + title: 'Generate Test Case (full steps in one call)', + description: desc( + TOOL_DESCRIPTION, + // PDX-482: the compact form must also carry the construction contract, + // otherwise PROVAR_MCP_SCHEMA_MODE=compact is a regression highway — + // the LLM would see a contract-free one-liner and could fall back to + // the multi-call pattern that caused PDX-479. + 'Generate a Provar test case in ONE call with the FULL steps[] tree. ' + + 'Do NOT call with steps=[] then append via provar_testcase_step_edit ' + + '(step_edit is for AMENDING existing test cases, not for CONSTRUCTING new ones).' + ), inputSchema: { - test_case_name: z.string().describe('Test case name (human-readable label)'), - steps: z.array(StepSchema).default([]).describe('Ordered list of test steps'), + test_case_name: z.string().describe(desc('Test case name (human-readable label)', 'string, test case name')), + steps: z + .array(StepSchema) + .default([]) + .describe( + desc( + 'Ordered list of test steps. Pass the COMPLETE step tree for the test case in a single call — ' + + 'do not call this tool with an empty array intending to append via provar_testcase_step_edit ' + + '(that pattern is for amendments only and produces structurally invalid test cases when used to construct).', + 'array, optional; FULL ordered step tree in one call' + ) + ), target_uri: z .string() .optional() .describe( - 'Page object URI that determines the XML nesting structure. ' + - 'Omit or use "sf:ui:target" for Salesforce targets (flat structure). ' + - 'Use "ui:pageobject:target?pageId=pageobjects.PageClass" for non-SF page objects — ' + - 'steps are wrapped in a UiWithScreen element targeting that class.' + desc( + 'Page object URI that determines the XML nesting structure. ' + + 'Omit or use "sf:ui:target" for Salesforce targets (flat structure). ' + + 'Use "ui:pageobject:target?pageId=pageobjects.PageClass" for non-SF page objects — ' + + 'steps are wrapped in a UiWithScreen element targeting that class.', + 'string, optional; sf:ui:target (SF) or ui:pageobject:target?pageId=... (non-SF)' + ) + ), + output_path: z + .string() + .optional() + .describe( + desc( + 'Suggested file path for the .xml file (returned in response)', + 'string, optional; output .xml file path' + ) + ), + overwrite: z + .boolean() + .default(false) + .describe(desc('Overwrite if output_path file already exists', 'bool, optional; overwrite if exists')), + dry_run: z + .boolean() + .default(true) + .describe( + desc( + 'true = return XML only (default); false = write to output_path', + 'bool, optional; default true, skip write' + ) ), - output_path: z.string().optional().describe('Suggested file path for the .xml file (returned in response)'), - overwrite: z.boolean().default(false).describe('Overwrite if output_path file already exists'), - dry_run: z.boolean().default(true).describe('true = return XML only (default); false = write to output_path'), validate_after_edit: z .boolean() .default(true) .describe( - 'Run structural validation after generation (default: true). ' + - 'Returns TESTCASE_INVALID error if the generated XML fails validation. ' + - 'Set false to skip validation and omit the validation field from the response.' + desc( + 'Run structural validation after generation (default: true). ' + + 'Returns TESTCASE_INVALID error if the generated XML fails validation. ' + + 'Set false to skip validation and omit the validation field from the response.', + 'bool, optional; default true, validate after generation' + ) + ), + idempotency_key: z + .string() + .optional() + .describe( + desc( + 'Caller-provided key echoed back for deduplication tracking', + 'string, optional; deduplication key echoed in response' + ) ), - idempotency_key: z.string().optional().describe('Caller-provided key echoed back for deduplication tracking'), }, }, (input) => { @@ -194,6 +262,33 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig target_uri: input.target_uri, }); + // Runtime guard for the multi-call construction pattern: rejects the exact + // shape that produces a contract-violating skeleton on disk — empty steps[] + // + non-dry-run + persistence target. Other empty-steps shapes (dry-run + // preview, no output_path) remain allowed. + if (input.steps.length === 0 && !input.dry_run && input.output_path) { + const err = makeError( + 'STEPS_REQUIRED', + 'provar_testcase_generate was called with an empty steps[] array and a target output_path. ' + + 'Constructing a test case requires the full step tree in a single call; ' + + 'an empty payload on the write path would produce a skeleton-only file.', + requestId, + false, + { + suggestion: + 'Pass the FULL step tree to provar_testcase_generate in a single call. ' + + 'provar_testcase_step_edit is for amending an already-validated test case ' + + '(single-step add, attribute fix, debug edit), not for constructing one from scratch. ' + + 'If you genuinely want a skeleton for inspection, set dry_run=true.', + } + ); + log('warn', 'provar_testcase_generate: STEPS_REQUIRED', { + requestId, + output_path: input.output_path, + }); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(err) }] }; + } + try { const xmlContent = buildTestCaseXml(input); const filePath: string | undefined = input.output_path ? path.resolve(input.output_path) : undefined; diff --git a/src/mcp/tools/testCaseStepTools.ts b/src/mcp/tools/testCaseStepTools.ts index 5a24b75f..bafb4e7d 100644 --- a/src/mcp/tools/testCaseStepTools.ts +++ b/src/mcp/tools/testCaseStepTools.ts @@ -16,6 +16,7 @@ import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { validateTestCase } from './testCaseValidate.js'; +import { desc } from './descHelper.js'; // ── XML parse / build config ────────────────────────────────────────────────── @@ -85,43 +86,86 @@ export function registerTestCaseStepEdit(server: McpServer, config: ServerConfig server.registerTool( 'provar_testcase_step_edit', { - title: 'Edit Test Case Step', - description: [ - 'Add or remove a single step (apiCall) in a Provar XML test case file.', - 'Uses write-to-temp-then-rename to minimise partial-write risk.', - 'Prerequisites: the test case must exist and be valid XML.', - 'For mode=remove: supply test_item_id of the step to remove.', - 'For mode=add: supply test_item_id of the anchor step, position (before|after, default after),', - 'and step_xml (the ... XML fragment for the new step; must contain exactly one ).', - 'A backup is written to .bak before any mutation and restored automatically if', - 'the post-edit validation fails.', - 'Returns STEP_NOT_FOUND (with all_test_item_ids list) when the target step is absent.', - 'Returns INVALID_STEP_XML when step_xml cannot be parsed or contains ≠1 elements.', - 'Returns INVALID_XML_AFTER_EDIT (backup restored) when the mutated file fails validation.', - 'Grounding for step_xml: call provar_qualityhub_examples_retrieve for corpus examples of the step type you need; if the response has count: 0 with a warning field, fall back: read the provar://docs/step-reference MCP resource.', - ].join(' '), + // PDX-484: carry the AMENDMENT-ONLY contract into the `title:` field. + // "Amend" mirrors the AMENDMENT-ONLY framing in the description body + // and "Existing" signals that the tool does not construct new test cases. + // Length: 29 chars — well within the chip-render comfort threshold. + title: 'Amend Existing Test Case Step', + description: desc( + [ + // ── Usage contract (READ FIRST — PDX-482) ───────────────────────────── + // This tool AMENDS an existing validated test case. It is NOT for + // constructing a test case from scratch — building one step-by-step via + // repeated step_edit calls produces structurally invalid test cases + // (dropped scenarios, flat asserts, inconsistent step types — see PDX-479). + 'AMENDMENT-ONLY tool: this is for amending an existing, already-validated Provar test case (single-step add, attribute fix, debug edit).', + 'NOT for constructing a test case from scratch — for new test cases use provar_testcase_generate with the FULL steps[] tree in a single call.', + 'Building a test case step-by-step via repeated step_edit calls after a steps=[] generate produces structurally invalid output (dropped scenarios, flat asserts, inconsistent step types).', + // ── Mechanics (unchanged below) ─────────────────────────────────────── + 'Add or remove a single step (apiCall) in a Provar XML test case file.', + 'Uses write-to-temp-then-rename to minimise partial-write risk.', + 'Prerequisites: the test case must exist and be valid XML.', + 'For mode=remove: supply test_item_id of the step to remove.', + 'For mode=add: supply test_item_id of the anchor step, position (before|after, default after),', + 'and step_xml (the ... XML fragment for the new step; must contain exactly one ).', + 'A backup is written to .bak before any mutation and restored automatically if', + 'the post-edit validation fails.', + 'Returns STEP_NOT_FOUND (with all_test_item_ids list) when the target step is absent.', + 'Returns INVALID_STEP_XML when step_xml cannot be parsed or contains ≠1 elements.', + 'Returns INVALID_XML_AFTER_EDIT (backup restored) when the mutated file fails validation.', + 'Grounding for step_xml: call provar_qualityhub_examples_retrieve for corpus examples of the step type you need; if the response has count: 0 with a warning field, fall back: read the provar://docs/step-reference MCP resource.', + ].join(' '), + 'AMENDMENT-ONLY: add or remove a single apiCall step in an existing Provar test case (not for constructing new ones).' + ), inputSchema: { - test_case_path: z.string().describe('Absolute path to the .testcase XML file; must be within --allowed-paths'), - mode: z.enum(['remove', 'add']).describe('"remove" to delete a step; "add" to insert a new step'), + test_case_path: z + .string() + .describe( + desc( + 'Absolute path to the .testcase XML file; must be within --allowed-paths', + 'string, absolute path to .testcase file' + ) + ), + mode: z + .enum(['remove', 'add']) + .describe(desc('"remove" to delete a step; "add" to insert a new step', 'enum remove|add')), test_item_id: z .string() - .describe('For mode=remove: testItemId of the step to delete. For mode=add: testItemId of the anchor step.'), + .describe( + desc( + 'For mode=remove: testItemId of the step to delete. For mode=add: testItemId of the anchor step.', + 'string, testItemId of target or anchor step' + ) + ), position: z .enum(['before', 'after']) .optional() .default('after') - .describe('Where to insert relative to the anchor step (mode=add only; default: after)'), + .describe( + desc( + 'Where to insert relative to the anchor step (mode=add only; default: after)', + 'enum before|after; default after' + ) + ), step_xml: z .string() .optional() .describe( - 'The ... XML fragment for the new step (mode=add only). Must be well-formed XML.' + desc( + 'The ... XML fragment for the new step (mode=add only). Must be well-formed XML.', + 'string, optional; XML fragment for new step' + ) ), validate_after_edit: z .boolean() .optional() .default(true) - .describe('Run provar_testcase_validate after the mutation; restores backup on failure (default: true)'), + .describe( + desc( + 'Run provar_testcase_validate after the mutation; restores backup on failure (default: true)', + 'bool, optional; default true, validate after edit' + ) + ), }, }, (input) => { diff --git a/src/mcp/tools/testCaseValidate.ts b/src/mcp/tools/testCaseValidate.ts index 7ef8d3d3..b374db21 100644 --- a/src/mcp/tools/testCaseValidate.ts +++ b/src/mcp/tools/testCaseValidate.ts @@ -8,6 +8,7 @@ /* eslint-disable camelcase */ import fs from 'node:fs'; import path from 'node:path'; +import { createHash } from 'node:crypto'; import { z } from 'zod'; import { XMLParser } from 'fast-xml-parser'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; @@ -23,7 +24,20 @@ import { QualityHubRateLimitError, REQUEST_ACCESS_URL, } from '../../services/qualityHub/client.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + computeContextHash, + resolveValidationDir, + type DiffableViolation, +} from '../utils/validationDiff.js'; import { runBestPractices } from './bestPracticesEngine.js'; +import { desc } from './descHelper.js'; const ONBOARDING_MESSAGE = 'Quality Hub validation unavailable — running local validation only (structural rules, no quality scoring).\n' + @@ -41,20 +55,109 @@ const UNREACHABLE_WARNING = 'Quality Hub API unreachable. Running local validation only (structural rules, no quality scoring).\n' + 'For CI/CD: set PROVAR_QUALITY_HUB_URL and PROVAR_API_KEY environment variables.'; +const TC_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'is_valid', + 'validity_score', + 'quality_score', + 'validation_source', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +/** Storage dir for testcase diff runs (namespaced to avoid cross-tool baseline collisions). */ +function tcStorageDir(): string { + return resolveValidationDir('testcase'); +} + +/** Resolve validation result from QualityHub API or fall back to local. */ +async function resolveBaseResult( + source: string, + apiKey: string | null, + requestId: string +): Promise { + if (!apiKey) { + return { ...validateTestCase(source), validation_source: 'local', validation_warning: ONBOARDING_MESSAGE }; + } + const baseUrl = getQualityHubBaseUrl(); + try { + const apiResult = await qualityHubClient.validateTestCaseViaApi(source, apiKey, baseUrl); + const localMeta = validateTestCase(source); + log('info', 'provar_testcase_validate: quality_hub', { requestId }); + return { + ...apiResult, + issues: apiResult.issues as unknown as ValidationIssue[], + step_count: localMeta.step_count, + error_count: apiResult.issues.filter((i) => i.severity === 'ERROR').length, + warning_count: apiResult.issues.filter((i) => i.severity === 'WARNING').length, + test_case_id: localMeta.test_case_id, + test_case_name: localMeta.test_case_name, + validation_source: 'quality_hub', + }; + } catch (apiErr: unknown) { + let warning: string; + if (apiErr instanceof QualityHubAuthError) { + warning = AUTH_WARNING; + log('warn', 'provar_testcase_validate: auth error, falling back', { requestId }); + } else if (apiErr instanceof QualityHubRateLimitError) { + warning = RATE_LIMIT_WARNING; + log('warn', 'provar_testcase_validate: rate limited, falling back', { requestId }); + } else { + warning = UNREACHABLE_WARNING; + log('warn', 'provar_testcase_validate: api unreachable, falling back', { requestId }); + } + return { ...validateTestCase(source), validation_source: 'local_fallback', validation_warning: warning }; + } +} + +/** Derive a stable context key for run ID generation. */ +function tcRunContext(filePath: string | undefined, xmlContent: string): string { + if (filePath) return filePath; + return createHash('sha1').update(xmlContent.slice(0, 200)).digest('hex').slice(0, 16); +} + export function registerTestCaseValidate(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_testcase_validate', { title: 'Validate Test Case', - description: - 'Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.', + description: desc( + 'Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved issues. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.', + 'Validate a Provar XML test case: structure, UUIDs, steps, quality scoring; run_id for baseline diff.' + ), inputSchema: { - content: z.string().optional().describe('XML content to validate directly (alias: xml)'), - xml: z.string().optional().describe('XML content to validate — API-compatible alias for content'), - file_path: z.string().optional().describe('Path to .xml test case file'), + content: z + .string() + .optional() + .describe(desc('XML content to validate directly (alias: xml)', 'string, inline content')), + xml: z + .string() + .optional() + .describe(desc('XML content to validate — API-compatible alias for content', 'string, inline content')), + file_path: z.string().optional().describe(desc('Path to .xml test case file', 'string, path to file')), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": is_valid, scores, and stop signal only. "standard"/"full": full issues list (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), + baseline_run_id: z + .string() + .optional() + .describe( + desc( + 'run_id from a previous call. When provided, returns only issues that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.', + 'string, optional; prev run_id for diff response' + ) + ), }, }, - async ({ content, xml, file_path }) => { + async ({ content, xml, file_path, detail, baseline_run_id }) => { const requestId = makeRequestId(); log('info', 'provar_testcase_validate', { requestId, has_content: !!(content ?? xml), file_path }); @@ -78,63 +181,80 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig } const apiKey = resolveApiKey(); + const baseResult = await resolveBaseResult(source, apiKey, requestId); + + const storageDir = tcStorageDir(); + const context = tcRunContext(file_path, source); + const contextHash = computeContextHash('tc', context); + const runId = generateRunId(context); + const bpViolations = (baseResult.best_practices_violations ?? []) as unknown as DiffableViolation[]; + const currentViolations: DiffableViolation[] = [ + ...(baseResult.issues as unknown as DiffableViolation[]), + ...bpViolations, + ]; + + // Load baseline BEFORE saving to prevent eviction of the requested baseline + const baseline = + baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id, contextHash) + : null; + + const hasBaseline = hasAnyRun(storageDir); + + try { + saveRun(storageDir, runId, currentViolations, contextHash); + } catch (saveErr) { + log('warn', 'provar_testcase_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } - if (apiKey) { - const baseUrl = getQualityHubBaseUrl(); - try { - const apiResult = await qualityHubClient.validateTestCaseViaApi(source, apiKey, baseUrl); - const localMeta = validateTestCase(source); - const result = { + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', requestId, - ...apiResult, - step_count: localMeta.step_count, - error_count: apiResult.issues.filter((i) => i.severity === 'ERROR').length, - warning_count: apiResult.issues.filter((i) => i.severity === 'WARNING').length, - test_case_id: localMeta.test_case_id, - test_case_name: localMeta.test_case_name, - validation_source: 'quality_hub' as const, - }; - log('info', 'provar_testcase_validate: quality_hub', { requestId }); - return { - content: [{ type: 'text' as const, text: JSON.stringify(result) }], - structuredContent: result, - }; - } catch (apiErr: unknown) { - // API failed — determine the warning and fall through to local validation - let warning: string; - if (apiErr instanceof QualityHubAuthError) { - warning = AUTH_WARNING; - log('warn', 'provar_testcase_validate: auth error, falling back', { requestId }); - } else if (apiErr instanceof QualityHubRateLimitError) { - warning = RATE_LIMIT_WARNING; - log('warn', 'provar_testcase_validate: rate limited, falling back', { requestId }); - } else { - warning = UNREACHABLE_WARNING; - log('warn', 'provar_testcase_validate: api unreachable, falling back', { requestId }); - } - const localResult = { - requestId, - ...validateTestCase(source), - validation_source: 'local_fallback' as const, - validation_warning: warning, - }; - return { - content: [{ type: 'text' as const, text: JSON.stringify(localResult) }], - structuredContent: localResult, - }; + false, + { suggestion: 'Run provar_testcase_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore(baseResult.is_valid ? 1 : 0, 1); + const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length); + const diffResponse = { + requestId, + run_id: runId, + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; } - // No API key configured — run local validation with onboarding message + const completeness_score = calcCompletenessScore(baseResult.is_valid ? 1 : 0, 1); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + const result = { requestId, - ...validateTestCase(source), - validation_source: 'local' as const, - validation_warning: ONBOARDING_MESSAGE, + run_id: runId, + completeness_score, + recommended_next_action, + ...baseResult, }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResult = applyDetailLevel(result, detailLevel, TC_VALIDATE_SUMMARY_FIELDS); + return { - content: [{ type: 'text' as const, text: JSON.stringify(result) }], - structuredContent: result, + content: [{ type: 'text' as const, text: JSON.stringify(finalResult) }], + structuredContent: finalResult, }; } catch (err: unknown) { const error = err as Error & { code?: string }; diff --git a/src/mcp/tools/testPlanTools.ts b/src/mcp/tools/testPlanTools.ts index 1dd8352e..f97928d3 100644 --- a/src/mcp/tools/testPlanTools.ts +++ b/src/mcp/tools/testPlanTools.ts @@ -15,6 +15,7 @@ import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { desc } from './descHelper.js'; // ── Helpers ─────────────────────────────────────────────────────────────────── @@ -64,26 +65,48 @@ export function registerTestPlanCreate(server: McpServer, config: ServerConfig): 'provar_testplan_create', { title: 'Create Test Plan', - description: [ - 'Create a new Provar test plan: makes the plans/{plan_name}/ directory and writes the root .planitem file.', - 'Use this before provar_testplan_create-suite or provar_testplan_add-instance, which both require the plan to already exist.', - 'Returns the guid assigned to the new plan, the plan directory path, and the .planitem path written.', - ].join(' '), + description: desc( + [ + 'Create a new Provar test plan: makes the plans/{plan_name}/ directory and writes the root .planitem file.', + 'Use this before provar_testplan_create-suite or provar_testplan_add-instance, which both require the plan to already exist.', + 'Returns the guid assigned to the new plan, the plan directory path, and the .planitem path written.', + ].join(' '), + 'Create a new Provar test plan directory with a root .planitem file.' + ), inputSchema: { project_path: z .string() - .describe('Absolute path to the Provar project root (must contain a .testproject file)'), - plan_name: z.string().describe('Name of the new test plan (becomes the directory name under plans/)'), + .describe( + desc( + 'Absolute path to the Provar project root (must contain a .testproject file)', + 'string, absolute path to project root' + ) + ), + plan_name: z + .string() + .describe( + desc('Name of the new test plan (becomes the directory name under plans/)', 'string, test plan name') + ), overwrite: z .boolean() .optional() .default(false) - .describe('Overwrite the .planitem file if the plan directory already exists (default: false)'), + .describe( + desc( + 'Overwrite the .planitem file if the plan directory already exists (default: false)', + 'bool, optional; overwrite .planitem if exists' + ) + ), dry_run: z .boolean() .optional() .default(false) - .describe('Return what would be created without writing to disk (default: false)'), + .describe( + desc( + 'Return what would be created without writing to disk (default: false)', + 'bool, optional; default false, skip write' + ) + ), }, }, ({ project_path, plan_name, overwrite, dry_run }) => { @@ -203,33 +226,60 @@ export function registerTestPlanAddInstance(server: McpServer, config: ServerCon 'provar_testplan_add-instance', { title: 'Add Test Plan Instance', - description: [ - 'Add a .testinstance file to an existing Provar test plan suite directory.', - 'The plan directory and suite directory must already exist.', - 'test_case_path is relative to the project root (e.g. "tests/MyTest.testcase").', - 'suite_path is the path within the plan (e.g. "MySuite" or "MySuite/SubSuite").', - 'Returns the guid assigned to the new instance and the path where it was written.', - ].join(' '), + description: desc( + [ + 'Add a .testinstance file to an existing Provar test plan suite directory.', + 'The plan directory and suite directory must already exist.', + 'test_case_path is relative to the project root (e.g. "tests/MyTest.testcase").', + 'suite_path is the path within the plan (e.g. "MySuite" or "MySuite/SubSuite").', + 'Returns the guid assigned to the new instance and the path where it was written.', + ].join(' '), + 'Add a .testinstance file to an existing test plan suite directory.' + ), inputSchema: { - project_path: z.string().describe('Absolute path to the Provar project root'), + project_path: z + .string() + .describe(desc('Absolute path to the Provar project root', 'string, absolute path to project root')), test_case_path: z .string() - .describe('Path to the .testcase file, relative to project root (e.g. "tests/MyTest.testcase")'), - plan_name: z.string().describe('Name of the test plan (directory under plans/)'), + .describe( + desc( + 'Path to the .testcase file, relative to project root (e.g. "tests/MyTest.testcase")', + 'string, relative path to .testcase file' + ) + ), + plan_name: z + .string() + .describe(desc('Name of the test plan (directory under plans/)', 'string, test plan name')), suite_path: z .string() .optional() - .describe('Path within the plan to place the instance (e.g. "MySuite" or "MySuite/SubSuite")'), + .describe( + desc( + 'Path within the plan to place the instance (e.g. "MySuite" or "MySuite/SubSuite")', + 'string, optional; suite path within plan' + ) + ), overwrite: z .boolean() .optional() .default(false) - .describe('Overwrite the .testinstance file if it already exists (default: false)'), + .describe( + desc( + 'Overwrite the .testinstance file if it already exists (default: false)', + 'bool, optional; overwrite if exists' + ) + ), dry_run: z .boolean() .optional() .default(false) - .describe('Return what would be written without writing to disk (default: false)'), + .describe( + desc( + 'Return what would be written without writing to disk (default: false)', + 'bool, optional; default false, skip write' + ) + ), }, }, ({ project_path, test_case_path, plan_name, suite_path, overwrite, dry_run }) => { @@ -423,25 +473,44 @@ export function registerTestPlanCreateSuite(server: McpServer, config: ServerCon 'provar_testplan_create-suite', { title: 'Create Test Plan Suite', - description: [ - 'Create a new suite directory inside a Provar test plan.', - 'The plan directory must already exist with a .planitem file at its root.', - 'Writes a new .planitem file into the created suite directory.', - 'Returns the guid assigned to the new suite.', - ].join(' '), + description: desc( + [ + 'Create a new suite directory inside a Provar test plan.', + 'The plan directory must already exist with a .planitem file at its root.', + 'Writes a new .planitem file into the created suite directory.', + 'Returns the guid assigned to the new suite.', + ].join(' '), + 'Create a new suite directory with a .planitem inside a test plan.' + ), inputSchema: { - project_path: z.string().describe('Absolute path to the Provar project root'), - plan_name: z.string().describe('Name of the test plan (directory under plans/)'), - suite_name: z.string().describe('Name of the new suite directory to create'), + project_path: z + .string() + .describe(desc('Absolute path to the Provar project root', 'string, absolute path to project root')), + plan_name: z + .string() + .describe(desc('Name of the test plan (directory under plans/)', 'string, test plan name')), + suite_name: z + .string() + .describe(desc('Name of the new suite directory to create', 'string, new suite directory name')), parent_suite_path: z .string() .optional() - .describe('Path of the parent suite within the plan (e.g. "MySuite"). Omit to create at plan root.'), + .describe( + desc( + 'Path of the parent suite within the plan (e.g. "MySuite"). Omit to create at plan root.', + 'string, optional; parent suite path within plan' + ) + ), dry_run: z .boolean() .optional() .default(false) - .describe('Return what would be created without writing to disk (default: false)'), + .describe( + desc( + 'Return what would be created without writing to disk (default: false)', + 'bool, optional; default false, skip write' + ) + ), }, }, ({ project_path, plan_name, suite_name, parent_suite_path, dry_run }) => { @@ -571,19 +640,36 @@ export function registerTestPlanRemoveInstance(server: McpServer, config: Server 'provar_testplan_remove-instance', { title: 'Remove Test Plan Instance', - description: [ - 'Remove a .testinstance file from a Provar test plan.', - 'instance_path is relative to the project root.', - 'Returns the path of the removed file.', - ].join(' '), + description: desc( + [ + 'Remove a .testinstance file from a Provar test plan.', + 'instance_path is relative to the project root.', + 'Returns the path of the removed file.', + ].join(' '), + 'Remove a .testinstance file from a Provar test plan.' + ), inputSchema: { - project_path: z.string().describe('Absolute path to the Provar project root'), - instance_path: z.string().describe('Path to the .testinstance file, relative to project root'), + project_path: z + .string() + .describe(desc('Absolute path to the Provar project root', 'string, absolute path to project root')), + instance_path: z + .string() + .describe( + desc( + 'Path to the .testinstance file, relative to project root', + 'string, relative path to .testinstance file' + ) + ), dry_run: z .boolean() .optional() .default(false) - .describe('Return what would be removed without deleting (default: false)'), + .describe( + desc( + 'Return what would be removed without deleting (default: false)', + 'bool, optional; default false, skip delete' + ) + ), }, }, ({ project_path, instance_path, dry_run }) => { diff --git a/src/mcp/tools/testPlanValidate.ts b/src/mcp/tools/testPlanValidate.ts index a9f30462..d1e91fc9 100644 --- a/src/mcp/tools/testPlanValidate.ts +++ b/src/mcp/tools/testPlanValidate.ts @@ -10,7 +10,38 @@ import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; -import { validatePlan, buildHierarchySummary, type TestPlanInput } from './hierarchyValidate.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + validatePlan, + buildHierarchySummary, + type TestPlanInput, + type PlanResult, + type SuiteResult, +} from './hierarchyValidate.js'; +import { desc } from './descHelper.js'; + +function countSuiteViolations(suite: SuiteResult): number { + let total = suite.violations.length; + for (const tc of suite.test_cases) { + total += tc.issues.length + tc.best_practices_violations.length; + } + for (const child of suite.test_suites) { + total += countSuiteViolations(child); + } + return total; +} + +function countAllPlanViolations(result: PlanResult): number { + let total = result.violations.length; + for (const suite of result.test_suites) { + total += countSuiteViolations(suite); + } + for (const tc of result.test_cases) { + total += tc.issues.length + tc.best_practices_violations.length; + } + return total; +} // ── Zod schemas ─────────────────────────────────────────────────────────────── @@ -70,33 +101,62 @@ const metadataSchema = z 'Plan completeness metadata — these fields are configured in the Provar Quality Hub app, not in local project files' ); +const PLAN_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'summary', + 'completeness_score', + 'recommended_next_action', +]; + export function registerTestPlanValidate(server: McpServer): void { server.registerTool( 'provar_testplan_validate', { title: 'Validate Test Plan', - description: - 'Validate a Provar test plan: checks for empty plans, duplicate suite names, oversized plans (>20 suites), plan completeness (objectives, scope, methodology, environments, acceptance criteria, test data strategy, risk assessment), and naming consistency. Recursively validates child suites and test cases. Returns quality score, plan-level violations, and full hierarchy results.', + description: desc( + 'Validate a Provar test plan: checks for empty plans, duplicate suite names, oversized plans (>20 suites), plan completeness (objectives, scope, methodology, environments, acceptance criteria, test data strategy, risk assessment), and naming consistency. Recursively validates child suites and test cases. Returns quality score, plan-level violations, and full hierarchy results. Use completeness_score and recommended_next_action to determine whether to continue iterating.', + 'Validate a Provar test plan: naming, size, completeness, per-suite quality; stop signal via completeness_score.' + ), inputSchema: { - plan_name: z.string().describe('Name of the test plan'), - test_suites: z.array(suiteSchema).optional().describe('Test suites belonging to this plan'), - test_cases: z.array(testCaseSchema).optional().describe('Test cases directly in this plan (not in a suite)'), + plan_name: z.string().describe(desc('Name of the test plan', 'string')), + test_suites: z + .array(suiteSchema) + .optional() + .describe(desc('Test suites belonging to this plan', 'object[], optional')), + test_cases: z + .array(testCaseSchema) + .optional() + .describe(desc('Test cases directly in this plan (not in a suite)', 'object[], optional')), test_suite_count: z .number() .int() .min(0) .optional() - .describe('Explicit suite count for size check (overrides counting test_suites)'), + .describe(desc('Explicit suite count for size check (overrides counting test_suites)', 'int ≥0, optional')), metadata: metadataSchema, quality_threshold: z .number() .min(0) .max(100) .optional() - .describe('Minimum quality score for a test case to be considered valid (default: 80)'), + .describe( + desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": name, scores, and stop signal only. "standard"/"full": full violations and hierarchy results (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), }, }, - ({ plan_name, test_suites, test_cases, test_suite_count, metadata, quality_threshold }) => { + ({ plan_name, test_suites, test_cases, test_suite_count, metadata, quality_threshold, detail }) => { const requestId = makeRequestId(); log('info', 'provar_testplan_validate', { requestId, plan_name }); @@ -112,11 +172,25 @@ export function registerTestPlanValidate(server: McpServer): void { const result = validatePlan(input, threshold); const summary = buildHierarchySummary(result); - const response = { requestId, ...result, summary }; + + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const remainingViolations = countAllPlanViolations(result); + const recommended_next_action = calcNextAction(completeness_score, false, remainingViolations); + + const response = { + requestId, + completeness_score, + recommended_next_action, + ...result, + summary, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, PLAN_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error; diff --git a/src/mcp/tools/testSuiteValidate.ts b/src/mcp/tools/testSuiteValidate.ts index bb23dcc8..07e96c47 100644 --- a/src/mcp/tools/testSuiteValidate.ts +++ b/src/mcp/tools/testSuiteValidate.ts @@ -10,7 +10,32 @@ import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; -import { validateSuite, buildHierarchySummary, type TestSuiteInput } from './hierarchyValidate.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + computeContextHash, + resolveValidationDir, + type DiffableViolation, +} from '../utils/validationDiff.js'; +import { validateSuite, buildHierarchySummary, type TestSuiteInput, type SuiteResult } from './hierarchyValidate.js'; +import { desc } from './descHelper.js'; + +function collectAllViolations(result: SuiteResult): DiffableViolation[] { + const all: DiffableViolation[] = [...(result.violations as unknown as DiffableViolation[])]; + for (const tc of result.test_cases) { + all.push(...(tc.issues as unknown as DiffableViolation[])); + all.push(...(tc.best_practices_violations as unknown as DiffableViolation[])); + } + for (const child of result.test_suites) { + all.push(...collectAllViolations(child)); + } + return all; +} // ── Zod schemas ─────────────────────────────────────────────────────────────── @@ -42,35 +67,77 @@ const childSuiteSchema = z.object({ test_case_count: z.number().int().min(0).optional().describe('Explicit test case count for size check'), }); +const SUITE_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'summary', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +function suiteStorageDir(): string { + return resolveValidationDir('testsuite'); +} + export function registerTestSuiteValidate(server: McpServer): void { server.registerTool( 'provar_testsuite_validate', { title: 'Validate Test Suite', - description: - 'Validate a Provar test suite: checks for empty suites, duplicate names, oversized suites (>75 tests), and naming convention consistency. Recursively validates child suites and individual test case XML. Returns quality score, suite-level violations, and per-test-case results.', + description: desc( + 'Validate a Provar test suite: checks for empty suites, duplicate names, oversized suites (>75 tests), and naming convention consistency. Recursively validates child suites and individual test case XML. Returns quality score, suite-level violations, and per-test-case results. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved violations.', + 'Validate a Provar test suite: naming, size, duplicates, per-test-case quality; run_id for diff.' + ), inputSchema: { - suite_name: z.string().describe('Name of the test suite'), - test_cases: z.array(testCaseSchema).optional().describe('Test cases directly in this suite'), + suite_name: z.string().describe(desc('Name of the test suite', 'string')), + test_cases: z + .array(testCaseSchema) + .optional() + .describe(desc('Test cases directly in this suite', 'object[], optional')), child_suites: z .array(childSuiteSchema) .optional() - .describe('Child test suites (supports up to 2 levels of nesting)'), + .describe(desc('Child test suites (supports up to 2 levels of nesting)', 'object[], optional')), test_case_count: z .number() .int() .min(0) .optional() - .describe('Explicit total test case count for size check (overrides counting test_cases)'), + .describe( + desc('Explicit total test case count for size check (overrides counting test_cases)', 'int ≥0, optional') + ), quality_threshold: z .number() .min(0) .max(100) .optional() - .describe('Minimum quality score for a test case to be considered valid (default: 80)'), + .describe( + desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') + ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": name, scores, and stop signal only. "standard"/"full": full violations and per-test-case results (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), + baseline_run_id: z + .string() + .optional() + .describe( + desc( + 'run_id from a previous call. When provided, returns only violations that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.', + 'string, optional; prev run_id for diff response' + ) + ), }, }, - ({ suite_name, test_cases, child_suites, test_case_count, quality_threshold }) => { + ({ suite_name, test_cases, child_suites, test_case_count, quality_threshold, detail, baseline_run_id }) => { const requestId = makeRequestId(); log('info', 'provar_testsuite_validate', { requestId, suite_name }); @@ -85,11 +152,75 @@ export function registerTestSuiteValidate(server: McpServer): void { const result = validateSuite(input, threshold); const summary = buildHierarchySummary(result); - const response = { requestId, ...result, summary }; + + const storageDir = suiteStorageDir(); + const contextHash = computeContextHash('suite', suite_name); + const runId = generateRunId(suite_name); + const currentViolations = collectAllViolations(result); + + // Load baseline BEFORE saving to prevent eviction of the requested baseline + const baseline = + baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id, contextHash) + : null; + + const hasBaseline = hasAnyRun(storageDir); + + try { + saveRun(storageDir, runId, currentViolations, contextHash); + } catch (saveErr) { + log('warn', 'provar_testsuite_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } + + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', + requestId, + false, + { suggestion: 'Run provar_testsuite_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; + } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length); + const diffResponse = { + requestId, + run_id: runId, + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; + } + + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + + const response = { + requestId, + run_id: runId, + completeness_score, + recommended_next_action, + ...result, + summary, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, SUITE_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error; diff --git a/src/mcp/utils/detailLevel.ts b/src/mcp/utils/detailLevel.ts new file mode 100644 index 00000000..752760f1 --- /dev/null +++ b/src/mcp/utils/detailLevel.ts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +export type DetailLevel = 'summary' | 'standard' | 'full'; + +/** + * Shape a structured response object according to the requested detail level. + * + * - `summary` — retain only the keys listed in summaryFields + * - `standard` — return data unchanged (the existing default response shape) + * - `full` — return data unchanged (callers expand gated fields before calling) + */ +export function applyDetailLevel( + data: Record, + level: DetailLevel, + summaryFields: string[] +): Record { + if (level === 'summary') { + return Object.fromEntries(Object.entries(data).filter(([k]) => summaryFields.includes(k))); + } + return data; +} diff --git a/src/mcp/utils/fieldMask.ts b/src/mcp/utils/fieldMask.ts new file mode 100644 index 00000000..9ea0e964 --- /dev/null +++ b/src/mcp/utils/fieldMask.ts @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/** + * Mask an object (or array of objects) to retain only the specified fields. + * + * - Top-level keys: `"name"` keeps only the `name` property + * - Dot notation: `"steps.action"` keeps the `steps` array but only `action` within each element + * - Unknown field names are silently ignored — never an error + * - Arrays: masking is applied to every element + * + * @param obj Source object or array (typed as unknown; cast internally, never through any) + * @param fields Parsed field list — each entry is a dot-path string + */ +export function maskFields(obj: unknown, fields: string[]): unknown { + if (Array.isArray(obj)) { + return obj.map((item) => maskFields(item, fields)); + } + + if (obj === null || typeof obj !== 'object') { + return obj; + } + + const source = obj as Record; + + // Group fields: topLevelKeys contains every key to retain. + // dotFields[key] holds the sub-paths to drill into for that key. + const topLevelKeys = new Set(); + const dotFields: Record = {}; + + for (const field of fields) { + const dotIdx = field.indexOf('.'); + if (dotIdx === -1) { + topLevelKeys.add(field); + } else { + const top = field.slice(0, dotIdx); + const rest = field.slice(dotIdx + 1); + topLevelKeys.add(top); + if (!dotFields[top]) dotFields[top] = []; + dotFields[top].push(rest); + } + } + + const result: Record = {}; + for (const key of topLevelKeys) { + if (!(key in source)) continue; // silently ignore unknown fields + const subPaths = dotFields[key]; + if (subPaths) { + const val = source[key]; + // Dot-path into a primitive can't be narrowed; omit rather than leak the whole value. + if (Array.isArray(val) || (val !== null && typeof val === 'object')) { + result[key] = maskFields(val, subPaths); + } + } else { + result[key] = source[key]; + } + } + + return result; +} + +/** + * Parse a comma-separated fields string into a trimmed, non-empty field list. + * Returns null when the string is absent or blank (caller should skip masking). + */ +export function parseFieldsParam(fields: string | undefined): string[] | null { + if (!fields) return null; + const parsed = fields + .split(',') + .map((f) => f.trim()) + .filter(Boolean); + return parsed.length > 0 ? parsed : null; +} diff --git a/src/mcp/utils/tokenMeta.ts b/src/mcp/utils/tokenMeta.ts new file mode 100644 index 00000000..23ab6f48 --- /dev/null +++ b/src/mcp/utils/tokenMeta.ts @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +// --------------------------------------------------------------------------- // +// Minimal structural types — avoids importing SDK internal paths. +// --------------------------------------------------------------------------- // + +type ContentItem = { type: 'text'; text: string }; + +export interface ToolResult { + content: ContentItem[]; + structuredContent?: Record; + isError?: boolean; +} + +interface ToolExtra { + sessionId?: string; +} + +export type AnyToolCallback = (args: Record, extra: ToolExtra) => ToolResult | Promise; + +// --------------------------------------------------------------------------- // +// PDX-474 — Depth Guard (PROVAR_MCP_MAX_TOOL_DEPTH) +// --------------------------------------------------------------------------- // + +interface SessionEntry { + calls: number; + totalEstimatedTokens: number; +} + +export type DepthGuardState = Map; + +const MAX_SESSIONS = 1000; + +export function createDepthGuardState(): DepthGuardState { + return new Map(); +} + +function getOrCreateEntry(state: DepthGuardState, sessionId: string): SessionEntry { + if (!state.has(sessionId)) { + if (state.size >= MAX_SESSIONS) { + const oldest: string | undefined = state.keys().next().value as string | undefined; + if (oldest !== undefined) state.delete(oldest); + } + state.set(sessionId, { calls: 0, totalEstimatedTokens: 0 }); + } + // Non-null guaranteed by the set above or pre-existing entry. + return state.get(sessionId) as SessionEntry; +} + +/** + * Wraps a tool handler to enforce a per-session call budget. + * Once `limit` calls have been made for a session, every further call returns + * TOOL_BUDGET_EXCEEDED without invoking the underlying handler. + * Callers without a sessionId (stdio transports — Claude Desktop, Cursor, etc.) + * share a single 'anon' bucket so the budget actually limits runaway tool use; + * giving each anon call a fresh UUID would defeat the purpose of the guard. + * `provardx_ping` is excluded from wrapping at the call site in server.ts. + */ +export function wrapWithDepthGuard( + toolName: string, + handler: AnyToolCallback, + state: DepthGuardState, + limit: number +): AnyToolCallback { + return async (args, extra) => { + const sessionId = extra.sessionId ?? 'anon'; + const entry = getOrCreateEntry(state, sessionId); + + if (entry.calls >= limit) { + const payload = { + error: 'TOOL_BUDGET_EXCEEDED', + callsMade: entry.calls, + limit, + suggestion: 'Summarize progress and return control to the user.', + }; + const response: ToolResult = { + isError: true, + content: [{ type: 'text' as const, text: JSON.stringify(payload) }], + structuredContent: payload, + }; + return attachMeta(response, toolName, 'standard', entry.totalEstimatedTokens); + } + + entry.calls++; + const result = await handler(args, extra); + + if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] === 'true') { + entry.totalEstimatedTokens += estimateTokens(result); + } + + const detailLevel = typeof args['detail'] === 'string' ? args['detail'] : 'standard'; + return attachMeta(result, toolName, detailLevel); + }; +} + +// --------------------------------------------------------------------------- // +// PDX-475 — Token meta attachment (PROVAR_MCP_EMIT_TOKEN_META) +// --------------------------------------------------------------------------- // + +export function estimateTokens(payload: unknown): number { + return Math.ceil(JSON.stringify(payload).length / 4); +} + +/** + * Appends a `_meta` key to `structuredContent` when PROVAR_MCP_EMIT_TOKEN_META=true. + * The `content[0].text` string is intentionally left unchanged — LLMs read that + * field, so including meta there would waste tokens on observability data. + * + * @param sessionTotalTokens - Cumulative estimated tokens for the session, + * included only on TOOL_BUDGET_EXCEEDED errors. + */ +export function attachMeta( + response: ToolResult, + toolName: string, + detailLevel: string, + sessionTotalTokens?: number +): ToolResult { + if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] !== 'true') return response; + + const meta: Record = { + tool: toolName, + detailLevel, + estimatedTokens: estimateTokens(response), + }; + + if (sessionTotalTokens !== undefined) { + meta['sessionTotalEstimatedTokens'] = sessionTotalTokens; + } + + const existing = response.structuredContent ?? {}; + return { + ...response, + structuredContent: { ...existing, _meta: meta }, + }; +} diff --git a/src/mcp/utils/validationDiff.ts b/src/mcp/utils/validationDiff.ts new file mode 100644 index 00000000..84106eae --- /dev/null +++ b/src/mcp/utils/validationDiff.ts @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { createHash } from 'node:crypto'; + +const MAX_RUNS = 20; +const INDEX_FILE = '.runs.json'; +const DEFAULT_ROOT_NAME = '.provardx'; +const VALIDATION_SUBDIR = 'validation'; + +// ── Public types ────────────────────────────────────────────────────────────── + +export type DiffableViolation = Record; + +export interface DiffResult { + added: DiffableViolation[]; + resolved: DiffableViolation[]; + unchanged_count: number; + run_id: string; +} + +interface RunRecord { + run_id: string; + timestamp: number; + filename: string; + /** + * Hash of `${toolTag}|${context}`. Used by loadBaselineViolations to reject + * a run_id whose context (file path, suite name, etc.) does not match the + * calling context — prevents cross-context diffs. Optional for backward + * compatibility with index records written before this field existed; those + * older records are treated as not matching any caller and are effectively + * invalidated within one or two new runs as the FIFO cap evicts them. + */ + context_hash?: string; +} + +interface RunsIndex { + runs: RunRecord[]; +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +/** Stable 8-char hash of a string for use in run IDs. */ +function shortHash(input: string): string { + return createHash('sha1').update(input).digest('hex').slice(0, 8); +} + +/** Build a unique key for a violation so additions/resolutions can be detected. */ +function violationKey(v: DiffableViolation): string { + const rule_id = String(v['rule_id'] ?? ''); + const applies_to = Array.isArray(v['applies_to']) + ? (v['applies_to'] as string[]).join(',') + : String(v['applies_to'] ?? ''); + const message = String(v['message'] ?? ''); + return `${rule_id}||${applies_to}||${message}`; +} + +function loadIndex(storageDir: string): RunsIndex { + const indexPath = path.join(storageDir, INDEX_FILE); + try { + return JSON.parse(fs.readFileSync(indexPath, 'utf-8')) as RunsIndex; + } catch { + return { runs: [] }; + } +} + +function saveIndex(storageDir: string, index: RunsIndex): void { + const indexPath = path.join(storageDir, INDEX_FILE); + fs.writeFileSync(indexPath, JSON.stringify(index, null, 2), 'utf-8'); +} + +// ── Public API ──────────────────────────────────────────────────────────────── + +/** + * Compute a stable 8-char context hash for a tool + context pair. Used to + * scope baseline run lookups so that a run_id from context A cannot be diffed + * against context B (different project, different suite, different file). + */ +export function computeContextHash(toolTag: string, context: string): string { + return shortHash(`${toolTag}|${context}`); +} + +/** + * Resolve the validation storage root for a given tool subdir. Honors the + * PROVAR_MCP_VALIDATION_DIR env var when set; otherwise falls back to + * `~/.provardx/validation/`. The env override is useful for restricted + * CI/dev environments where the home directory is read-only or shared. + */ +export function resolveValidationDir(subdir: string): string { + const override = process.env['PROVAR_MCP_VALIDATION_DIR']?.trim(); + if (override) return path.join(override, subdir); + return path.join(os.homedir(), DEFAULT_ROOT_NAME, VALIDATION_SUBDIR, subdir); +} + +/** Generate a run ID from a context string (e.g. project path or suite name). */ +export function generateRunId(context: string): string { + const rand = Math.random().toString(36).slice(2, 6); + return `${Date.now()}-${shortHash(context)}-${rand}`; +} + +/** + * Check whether any prior runs exist in the given storage directory. + * Used by calcNextAction to determine the first-run heuristic. + */ +export function hasAnyRun(storageDir: string): boolean { + const index = loadIndex(storageDir); + return index.runs.length > 0; +} + +/** + * Save the current violations as a new run in the storage directory. + * Caps the index at MAX_RUNS by evicting the oldest entry when full. + * Returns the generated run_id. + * + * When `contextHash` is provided, it is recorded alongside the run so that + * `loadBaselineViolations` can reject a baseline_run_id whose context does + * not match the calling context (prevents cross-context diffs). + */ +export function saveRun( + storageDir: string, + runId: string, + violations: DiffableViolation[], + contextHash?: string +): string { + fs.mkdirSync(storageDir, { recursive: true }); + + const filename = `${runId}.json`; + fs.writeFileSync(path.join(storageDir, filename), JSON.stringify(violations), 'utf-8'); + + const index = loadIndex(storageDir); + index.runs.push({ + run_id: runId, + timestamp: Date.now(), + filename, + ...(contextHash ? { context_hash: contextHash } : {}), + }); + + // Evict oldest entries when over the cap + while (index.runs.length > MAX_RUNS) { + const evicted = index.runs.shift(); + if (evicted) { + try { + fs.unlinkSync(path.join(storageDir, evicted.filename)); + } catch { + /* best-effort eviction */ + } + } + } + + saveIndex(storageDir, index); + return runId; +} + +/** + * Load the violations array for a given baseline run ID. + * Returns null if the run is not found in the index (BASELINE_NOT_FOUND). + * The filename is looked up from the index only — the run_id itself is never + * used to construct a file path, preventing path traversal. + * + * When `expectedContextHash` is provided, the record's `context_hash` must + * match. Records without a `context_hash` (written by older versions before + * H3) are treated as a mismatch and are effectively retired within one or + * two new runs as the FIFO cap evicts them. This guard prevents diffing a + * baseline from a different file/suite/project against the current context. + */ +export function loadBaselineViolations( + storageDir: string, + baselineRunId: string, + expectedContextHash?: string +): DiffableViolation[] | null { + const index = loadIndex(storageDir); + const record = index.runs.find((r) => r.run_id === baselineRunId); + if (!record) return null; + + if (expectedContextHash !== undefined && record.context_hash !== expectedContextHash) { + return null; + } + + // Use the filename from the index, not the run_id + try { + const content = fs.readFileSync(path.join(storageDir, record.filename), 'utf-8'); + return JSON.parse(content) as DiffableViolation[]; + } catch { + return null; + } +} + +/** + * Compute the diff between a baseline and current violations array. + * Uses (rule_id + applies_to + full message) as the unique key. + * Duplicate violations (same key, multiple occurrences) are treated as + * distinct entries — each occurrence is counted separately (multiset semantics). + */ +export function computeDiff(baseline: DiffableViolation[], current: DiffableViolation[]): Omit { + // Build multiset counts keyed by violation identity + const baselineCounts = new Map(); + for (const v of baseline) { + const key = violationKey(v); + const entry = baselineCounts.get(key); + if (entry) { + entry.count++; + } else { + baselineCounts.set(key, { count: 1, sample: v }); + } + } + + const currentCounts = new Map(); + for (const v of current) { + const key = violationKey(v); + const entry = currentCounts.get(key); + if (entry) { + entry.count++; + } else { + currentCounts.set(key, { count: 1, sample: v }); + } + } + + const added: DiffableViolation[] = []; + const resolved: DiffableViolation[] = []; + let unchanged_count = 0; + + // Tally additions: occurrences in current that exceed baseline count + for (const [key, { count: curr, sample }] of currentCounts) { + const base = baselineCounts.get(key)?.count ?? 0; + unchanged_count += Math.min(base, curr); + const addedCount = curr - base; + for (let i = 0; i < addedCount; i++) added.push(sample); + } + + // Tally resolutions: occurrences in baseline that exceed current count + for (const [key, { count: base, sample }] of baselineCounts) { + const curr = currentCounts.get(key)?.count ?? 0; + const resolvedCount = base - Math.min(base, curr); + for (let i = 0; i < resolvedCount; i++) resolved.push(sample); + } + + return { added, resolved, unchanged_count }; +} diff --git a/src/mcp/utils/validationScore.ts b/src/mcp/utils/validationScore.ts new file mode 100644 index 00000000..7c239b24 --- /dev/null +++ b/src/mcp/utils/validationScore.ts @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +export type NextAction = 'stop' | 'fix_and_revalidate' | 'inspect_failures'; + +/** Fraction of passing tests expressed as 0–100 integer. Returns 0 when total is 0. */ +export function calcCompletenessScore(passing: number, total: number): number { + if (total === 0) return 0; + return Math.round((passing / total) * 100); +} + +/** + * Recommend what the agent should do next based on the completeness score, + * remaining violation count, and whether any prior runs exist on disk. + * + * - `stop` → score is 100 AND no violations remain + * - `inspect_failures` → first run (no baseline on disk) — review what's failing before trying to fix + * - `fix_and_revalidate`→ subsequent run — agent knows the failure set, should fix and re-run + * + * The secondary `remainingViolationCount` check prevents `stop` from firing when all + * tests pass but quality or best-practice violations are still present. + */ +export function calcNextAction(score: number, hasBaseline: boolean, remainingViolationCount = 0): NextAction { + if (score === 100 && remainingViolationCount === 0) return 'stop'; + if (!hasBaseline) return 'inspect_failures'; + return 'fix_and_revalidate'; +} diff --git a/test/unit/mcp/connectionTools.test.ts b/test/unit/mcp/connectionTools.test.ts index cf5ec3a6..6dbecb82 100644 --- a/test/unit/mcp/connectionTools.test.ts +++ b/test/unit/mcp/connectionTools.test.ts @@ -53,27 +53,62 @@ function writeTestProject(dir: string, content: string): void { // ── .testproject fixture content ────────────────────────────────────────────── +// Mirrors the real .testproject XML shape: +// connectionClass → connections → connection → connectionUrls → connectionUrl +// environment → associations → association[@connectionId] +// The pre-PDX-478 fixture used a flattened shape that did not exist in real +// projects, which is how the parser bugs slipped through CI. const BASIC_TEST_PROJECT = ` - - - - + + + + + + + + + + + + + - - + + + + + + + - - + + + + + + + - - + + + + + + + + + + + + + `; @@ -151,25 +186,53 @@ describe('provar_connection_list', () => { assert.equal(sfConn['sso_configured'], false); }); - it('returns environments with name, connection, and url', () => { + it('resolves environment.connection via associations[@connectionId]', () => { writeTestProject(tmpDir, BASIC_TEST_PROJECT); const result = server.call('provar_connection_list', { project_path: tmpDir }); const environments = parseText(result)['environments'] as Array>; assert.ok(Array.isArray(environments)); - assert.equal(environments.length, 2); + assert.equal(environments.length, 3); const qa = environments.find((e) => e['name'] === 'QA'); assert.ok(qa); assert.equal(qa['connection'], 'MyOrg'); - assert.equal(qa['url'], 'https://qa.example.com'); }); - it('returns environment without url when not present', () => { + it('returns environment-specific url when a connectionUrl has @envId matching env @guid', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { project_path: tmpDir }); + const environments = parseText(result)['environments'] as Array>; + const qa = environments.find((e) => e['name'] === 'QA'); + assert.ok(qa); + assert.equal(qa['url'], 'sfdc://user@example.com.qa;environment=SANDBOX'); + }); + + it('omits url on environment when no per-env connectionUrl exists', () => { writeTestProject(tmpDir, BASIC_TEST_PROJECT); const result = server.call('provar_connection_list', { project_path: tmpDir }); const environments = parseText(result)['environments'] as Array>; const uat = environments.find((e) => e['name'] === 'UAT'); assert.ok(uat); - assert.equal(uat['url'], undefined, 'UAT has no url attribute'); + assert.equal(uat['url'], undefined, 'UAT has no @envId-matched connectionUrl'); + }); + + it("handles environments with empty gracefully (no crash, connection='')", () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { project_path: tmpDir }); + assert.equal(isError(result), false); + const environments = parseText(result)['environments'] as Array>; + const noAssoc = environments.find((e) => e['name'] === 'NoAssoc'); + assert.ok(noAssoc); + assert.equal(noAssoc['connection'], ''); + assert.equal(noAssoc['url'], undefined); + }); + + it('connection.url uses the default connectionUrl (entry without @envId)', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { project_path: tmpDir }); + const connections = parseText(result)['connections'] as Array>; + const myOrg = connections.find((c) => c['name'] === 'MyOrg'); + assert.ok(myOrg); + assert.equal(myOrg['url'], 'sfdc://user@example.com;environment=SANDBOX'); }); it('returns summary with correct counts', () => { @@ -177,7 +240,7 @@ describe('provar_connection_list', () => { const result = server.call('provar_connection_list', { project_path: tmpDir }); const summary = parseText(result)['summary'] as Record; assert.equal(summary['connection_count'], 4); - assert.equal(summary['environment_count'], 2); + assert.equal(summary['environment_count'], 3); }); it('returns empty arrays for project with no connections or environments', () => { @@ -190,6 +253,59 @@ describe('provar_connection_list', () => { }); }); + describe('fields param (sparse field masking)', () => { + it('retains only specified top-level keys when fields is provided', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { + project_path: tmpDir, + fields: 'connections,summary', + }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('connections' in body, 'connections should be retained'); + assert.ok('summary' in body, 'summary should be retained'); + assert.ok(!('environments' in body), 'environments should be masked out'); + assert.ok(!('requestId' in body), 'requestId should be masked out'); + }); + + it('omitting fields returns the full response', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { project_path: tmpDir }); + const body = parseText(result); + assert.ok('connections' in body); + assert.ok('environments' in body); + assert.ok('requestId' in body); + }); + + it('silently ignores unknown field names', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { + project_path: tmpDir, + fields: 'connections,ghost_field', + }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('connections' in body); + assert.ok(!('ghost_field' in body)); + }); + + it('supports dot notation to narrow connection entries', () => { + writeTestProject(tmpDir, BASIC_TEST_PROJECT); + const result = server.call('provar_connection_list', { + project_path: tmpDir, + fields: 'connections.name,connections.type', + }); + assert.equal(isError(result), false); + const body = parseText(result); + const connections = body['connections'] as Array>; + assert.ok(Array.isArray(connections)); + assert.ok('name' in connections[0], 'name should be retained'); + assert.ok('type' in connections[0], 'type should be retained'); + assert.ok(!('url' in connections[0]), 'url should be masked out'); + assert.ok(!('sso_configured' in connections[0]), 'sso_configured should be masked out'); + }); + }); + describe('error cases', () => { it('returns CONNECTION_FILE_NOT_FOUND when .testproject is missing', () => { const result = server.call('provar_connection_list', { project_path: tmpDir }); diff --git a/test/unit/mcp/detailLevel.test.ts b/test/unit/mcp/detailLevel.test.ts new file mode 100644 index 00000000..46c48e9c --- /dev/null +++ b/test/unit/mcp/detailLevel.test.ts @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'mocha'; +import { applyDetailLevel } from '../../../src/mcp/utils/detailLevel.js'; + +const SAMPLE = { + requestId: 'req-1', + name: 'MySuite', + quality_score: 90, + issues: [{ rule_id: 'RULE-001', message: 'Missing doc' }], + run_id: 'run-123', + completeness_score: 100, + recommended_next_action: 'stop', +}; + +const SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +describe('applyDetailLevel', () => { + it('summary — retains only summaryFields keys', () => { + const result = applyDetailLevel(SAMPLE, 'summary', SUMMARY_FIELDS); + assert.deepEqual(Object.keys(result).sort(), SUMMARY_FIELDS.slice().sort()); + assert.ok(!('issues' in result), 'issues should be excluded from summary'); + }); + + it('summary — preserves values for included keys', () => { + const result = applyDetailLevel(SAMPLE, 'summary', SUMMARY_FIELDS); + assert.equal(result['requestId'], 'req-1'); + assert.equal(result['quality_score'], 90); + assert.equal(result['recommended_next_action'], 'stop'); + }); + + it('standard — returns data unchanged', () => { + const result = applyDetailLevel(SAMPLE, 'standard', SUMMARY_FIELDS); + assert.deepEqual(result, SAMPLE); + }); + + it('full — returns data unchanged', () => { + const result = applyDetailLevel(SAMPLE, 'full', SUMMARY_FIELDS); + assert.deepEqual(result, SAMPLE); + }); + + it('summary with empty summaryFields returns empty object', () => { + const result = applyDetailLevel(SAMPLE, 'summary', []); + assert.deepEqual(result, {}); + }); + + it('summary with a field absent from data is silently skipped', () => { + const result = applyDetailLevel({ a: 1 }, 'summary', ['a', 'missing_key']); + assert.deepEqual(result, { a: 1 }); + }); + + it('standard returns the same object reference as input', () => { + const data: Record = { x: 1 }; + const result = applyDetailLevel(data, 'standard', []); + assert.strictEqual(result, data); + }); +}); diff --git a/test/unit/mcp/fieldMask.test.ts b/test/unit/mcp/fieldMask.test.ts new file mode 100644 index 00000000..5e6e86ee --- /dev/null +++ b/test/unit/mcp/fieldMask.test.ts @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +import { strict as assert } from 'node:assert'; +import { describe, it } from 'mocha'; +import { maskFields, parseFieldsParam } from '../../../src/mcp/utils/fieldMask.js'; + +// ── maskFields ──────────────────────────────────────────────────────────────── + +describe('maskFields', () => { + describe('top-level field selection', () => { + it('retains only the specified top-level keys', () => { + const obj = { id: '1', name: 'Test', status: 'PASS', steps: [{ action: 'click' }] }; + const result = maskFields(obj, ['id', 'name']) as Record; + assert.deepEqual(result, { id: '1', name: 'Test' }); + }); + + it('silently ignores unknown field names', () => { + const obj = { id: '1', name: 'Test' }; + const result = maskFields(obj, ['id', 'nonexistent']) as Record; + assert.deepEqual(result, { id: '1' }); + }); + + it('returns empty object when all fields are unknown', () => { + const obj = { id: '1', name: 'Test' }; + const result = maskFields(obj, ['foo', 'bar']) as Record; + assert.deepEqual(result, {}); + }); + }); + + describe('dot notation for nested fields', () => { + it('retains the parent key with only specified sub-fields', () => { + const obj = { steps: [{ action: 'click', element: 'button', wait: 500 }] }; + const result = maskFields(obj, ['steps.action']) as Record; + const steps = result['steps'] as Array>; + assert.ok(Array.isArray(steps)); + assert.deepEqual(steps[0], { action: 'click' }); + }); + + it('supports multiple dot-notation paths under the same parent', () => { + const obj = { steps: [{ action: 'click', element: 'button', wait: 500 }] }; + const result = maskFields(obj, ['steps.action', 'steps.element']) as Record; + const steps = result['steps'] as Array>; + assert.deepEqual(steps[0], { action: 'click', element: 'button' }); + }); + + it('mixes top-level and dot-notation fields', () => { + const obj = { id: '1', name: 'Test', steps: [{ action: 'click', wait: 500 }] }; + const result = maskFields(obj, ['id', 'steps.action']) as Record; + assert.equal(result['id'], '1'); + const steps = result['steps'] as Array>; + assert.deepEqual(steps[0], { action: 'click' }); + }); + + it('silently ignores unknown dot-notation sub-fields', () => { + const obj = { steps: [{ action: 'click' }] }; + const result = maskFields(obj, ['steps.action', 'steps.ghost']) as Record; + const steps = result['steps'] as Array>; + assert.deepEqual(steps[0], { action: 'click' }); + }); + }); + + describe('array handling', () => { + it('applies masking to every element of a top-level array', () => { + const arr = [ + { name: 'A', type: 'sf', extra: true }, + { name: 'B', type: 'ui', extra: false }, + ]; + const result = maskFields(arr, ['name', 'type']) as Array>; + assert.equal(result.length, 2); + assert.deepEqual(result[0], { name: 'A', type: 'sf' }); + assert.deepEqual(result[1], { name: 'B', type: 'ui' }); + }); + + it('handles empty arrays without error', () => { + const result = maskFields([], ['name']); + assert.deepEqual(result, []); + }); + }); + + describe('edge cases', () => { + it('passes through primitive values unchanged', () => { + assert.equal(maskFields('hello', ['x']), 'hello'); + assert.equal(maskFields(42, ['x']), 42); + assert.equal(maskFields(null, ['x']), null); + }); + + it('handles objects with numeric or boolean values', () => { + const obj = { count: 5, active: true, name: 'Test' }; + const result = maskFields(obj, ['count', 'active']) as Record; + assert.deepEqual(result, { count: 5, active: true }); + }); + + it('handles a field that exists but has a null value', () => { + const obj = { id: '1', extra: null }; + const result = maskFields(obj, ['extra']) as Record; + assert.deepEqual(result, { extra: null }); + }); + }); +}); + +// ── parseFieldsParam ────────────────────────────────────────────────────────── + +describe('parseFieldsParam', () => { + it('returns null when undefined', () => { + assert.equal(parseFieldsParam(undefined), null); + }); + + it('returns null for blank string', () => { + assert.equal(parseFieldsParam(''), null); + assert.equal(parseFieldsParam(' '), null); + }); + + it('trims whitespace around entries', () => { + const result = parseFieldsParam('id , name , status'); + assert.deepEqual(result, ['id', 'name', 'status']); + }); + + it('filters out empty tokens from trailing commas', () => { + const result = parseFieldsParam('id,name,'); + assert.deepEqual(result, ['id', 'name']); + }); + + it('returns a single-item array for one field', () => { + assert.deepEqual(parseFieldsParam('name'), ['name']); + }); + + it('preserves dot notation intact', () => { + const result = parseFieldsParam('connections.name,connections.type'); + assert.deepEqual(result, ['connections.name', 'connections.type']); + }); +}); diff --git a/test/unit/mcp/guidePrompts.test.ts b/test/unit/mcp/guidePrompts.test.ts new file mode 100644 index 00000000..c11e190c --- /dev/null +++ b/test/unit/mcp/guidePrompts.test.ts @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +import { strict as assert } from 'node:assert'; +import { describe, it, beforeEach } from 'mocha'; +import { + registerOnboardingPrompt, + registerTroubleshootPrompt, + registerOrchestrationPrompt, +} from '../../../src/mcp/prompts/guidePrompts.js'; + +// ── Minimal McpServer mock ───────────────────────────────────────────────────── + +type PromptHandler = (args: Record) => { + messages: Array<{ role: string; content: { type: string; text: string } }>; +}; + +interface PromptRegistration { + name: string; + description: string; + handler: PromptHandler; +} + +class MockMcpServer { + public registrations: PromptRegistration[] = []; + + public prompt(name: string, description: string, _schema: unknown, handler: PromptHandler): void { + this.registrations.push({ name, description, handler }); + } + + public call(name: string, args: Record): ReturnType { + const reg = this.registrations.find((r) => r.name === name); + if (!reg) throw new Error(`Prompt not registered: ${name}`); + return reg.handler(args); + } +} + +function getMessageText(result: ReturnType): string { + assert.ok(result.messages.length > 0, 'Expected at least one message'); + assert.equal(result.messages[0].role, 'user'); + assert.equal(result.messages[0].content.type, 'text'); + return result.messages[0].content.text; +} + +// ── Tests ────────────────────────────────────────────────────────────────────── + +let server: MockMcpServer; + +beforeEach(() => { + server = new MockMcpServer(); + registerOnboardingPrompt(server as never); + registerTroubleshootPrompt(server as never); + registerOrchestrationPrompt(server as never); +}); + +describe('guidePrompts — registration', () => { + it('registers all 3 guide prompts', () => { + assert.equal(server.registrations.length, 3); + }); + + it('registers provar.guide.onboarding', () => { + const reg = server.registrations.find((r) => r.name === 'provar.guide.onboarding'); + assert.ok(reg, 'provar.guide.onboarding should be registered'); + }); + + it('registers provar.guide.troubleshoot', () => { + const reg = server.registrations.find((r) => r.name === 'provar.guide.troubleshoot'); + assert.ok(reg, 'provar.guide.troubleshoot should be registered'); + }); + + it('registers provar.guide.orchestration', () => { + const reg = server.registrations.find((r) => r.name === 'provar.guide.orchestration'); + assert.ok(reg, 'provar.guide.orchestration should be registered'); + }); +}); + +// ── Regression guard: the PDX-481 single-call construction copy ──────────────── +// These assertions protect the canonical phrasing that fixes PDX-479. If you +// rewrite the author-test flow in guidePrompts.ts, you MUST keep equivalent +// guidance — otherwise the 1.5.0 regression returns. + +describe('guidePrompts — author-test flow (PDX-481 regression guard)', () => { + it('author-test flow recommends single-call construction', () => { + const text = getMessageText(server.call('provar.guide.orchestration', { task: 'author-test' })); + assert.ok( + text.includes('single call') || text.includes('one call') || text.includes('in one payload'), + 'author-test flow must recommend single-call construction (search: "single call" / "one call" / "in one payload")' + ); + assert.ok( + text.includes('ALL steps') || text.includes('full step tree') || text.includes('full tree'), + 'author-test flow must call out passing the full step tree at once' + ); + }); + + it('author-test flow does NOT recommend per-step construction', () => { + const text = getMessageText(server.call('provar.guide.orchestration', { task: 'author-test' })); + assert.ok( + !text.includes('repeat per step'), + 'author-test flow must not say "repeat per step" — that pattern caused PDX-479' + ); + // Unconditional check — the old OR-clause "|| text.includes('amend')" short-circuited to pass + // (because "amend" appears repeatedly elsewhere in the flow), so it provided no real protection + // against the "repeat as needed" phrasing being reintroduced. + assert.ok( + !text.includes('repeat as needed'), + 'author-test flow must not say "repeat as needed" — that pattern caused PDX-479' + ); + }); + + it('author-test flow marks step_edit as amendment-only', () => { + const text = getMessageText(server.call('provar.guide.orchestration', { task: 'author-test' })); + assert.ok( + text.includes('amend') || text.includes('Amend') || text.includes('AMENDING'), + 'author-test flow must mark provar_testcase_step_edit as for amending existing test cases' + ); + }); +}); + +describe('guidePrompts — orchestration general flow (PDX-481 regression guard)', () => { + it('prerequisite graph splits generate and step_edit into distinct entry points', () => { + const text = getMessageText(server.call('provar.guide.orchestration', {})); + // The pre-fix string was: "provar_testcase_generate OR provar_testcase_step_edit" + // The post-fix split lists them on separate lines with distinct annotations. + assert.ok( + !text.includes('provar_testcase_generate OR provar_testcase_step_edit'), + 'prerequisite graph must not equate generate and step_edit — they have different purposes' + ); + // Bounded regex tied to the exact annotation punctuation used in the prompt body — + // "provar_testcase_generate (construct …" / "provar_testcase_step_edit (amend …". + // Bounding the gap to ≤8 chars (i.e. the single " (" that should appear before the + // annotation) avoids the loose-`[^\n]*` false-positive where unrelated tokens between + // the two words on the same line would still match. + assert.ok( + /provar_testcase_generate\s*\(construct/i.test(text), + 'prerequisite graph must annotate provar_testcase_generate as the construct entry point' + ); + assert.ok( + /provar_testcase_step_edit\s*\(amend/i.test(text), + 'prerequisite graph must annotate provar_testcase_step_edit as the amend entry point' + ); + }); +}); diff --git a/test/unit/mcp/projectInspect.test.ts b/test/unit/mcp/projectInspect.test.ts new file mode 100644 index 00000000..f6be6349 --- /dev/null +++ b/test/unit/mcp/projectInspect.test.ts @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { describe, it, beforeEach, afterEach } from 'mocha'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { registerProjectInspect } from '../../../src/mcp/tools/projectInspect.js'; +import type { ServerConfig } from '../../../src/mcp/server.js'; + +// ── Minimal McpServer mock ──────────────────────────────────────────────────── + +type ToolHandler = (args: Record) => unknown; + +class MockMcpServer { + private handlers = new Map(); + + public registerTool(name: string, _config: unknown, handler: ToolHandler): void { + this.handlers.set(name, handler); + } + + public call(name: string, args: Record): ReturnType { + const h = this.handlers.get(name); + if (!h) throw new Error(`Tool not registered: ${name}`); + return h(args); + } +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function parseText(result: unknown): Record { + const r = result as { content: Array<{ type: string; text: string }> }; + return JSON.parse(r.content[0].text) as Record; +} + +function isError(result: unknown): boolean { + return (result as { isError?: boolean }).isError === true; +} + +// ── Test setup ──────────────────────────────────────────────────────────────── + +let tmpDir: string; +let server: MockMcpServer; +let config: ServerConfig; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'inspect-test-')); + server = new MockMcpServer(); + config = { allowedPaths: [tmpDir] }; + registerProjectInspect(server as unknown as McpServer, config); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ── provar_project_inspect — detail param ───────────────────────────────────── + +describe('provar_project_inspect — detail param', () => { + it('standard (default) returns all top-level fields including test_case_files', () => { + const result = server.call('provar_project_inspect', { project_path: tmpDir }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('test_case_files' in body, 'standard should include test_case_files'); + assert.ok('summary' in body, 'standard should include summary'); + assert.ok('requestId' in body, 'standard should include requestId'); + }); + + it('summary retains only requestId, project_path, provar_home, and summary', () => { + const result = server.call('provar_project_inspect', { project_path: tmpDir, detail: 'summary' }); + assert.equal(isError(result), false); + const body = parseText(result); + const keys = Object.keys(body); + assert.ok(keys.includes('requestId'), 'summary must include requestId'); + assert.ok(keys.includes('project_path'), 'summary must include project_path'); + assert.ok(keys.includes('summary'), 'summary must include summary'); + assert.ok(!keys.includes('test_case_files'), 'summary must not include test_case_files'); + assert.ok(!keys.includes('ant_build_files'), 'summary must not include ant_build_files'); + assert.ok(!keys.includes('test_project'), 'summary must not include test_project'); + }); + + it('full returns all fields (same as standard for this tool)', () => { + const resultFull = server.call('provar_project_inspect', { project_path: tmpDir, detail: 'full' }); + const resultStd = server.call('provar_project_inspect', { project_path: tmpDir, detail: 'standard' }); + const full = parseText(resultFull); + const std = parseText(resultStd); + // Both should have the same keys (requestId will differ — compare key sets only) + assert.deepEqual(Object.keys(full).sort(), Object.keys(std).sort()); + }); + + it('omitting detail defaults to standard behaviour', () => { + const withDefault = server.call('provar_project_inspect', { project_path: tmpDir }); + const withStandard = server.call('provar_project_inspect', { project_path: tmpDir, detail: 'standard' }); + const a = Object.keys(parseText(withDefault)).sort(); + const b = Object.keys(parseText(withStandard)).sort(); + assert.deepEqual(a, b, 'omitting detail should match explicit standard'); + }); +}); + +// ── provar_project_inspect — fields param ───────────────────────────────────── + +describe('provar_project_inspect — fields param', () => { + it('retains only specified top-level keys', () => { + const result = server.call('provar_project_inspect', { + project_path: tmpDir, + fields: 'test_case_files,summary', + }); + const body = parseText(result); + assert.ok('test_case_files' in body); + assert.ok('summary' in body); + assert.ok(!('requestId' in body), 'requestId should be masked out'); + assert.ok(!('test_project' in body), 'test_project should be masked out'); + }); + + it('omitting fields returns full response', () => { + const result = server.call('provar_project_inspect', { project_path: tmpDir }); + const body = parseText(result); + assert.ok('requestId' in body); + assert.ok('summary' in body); + }); + + it('silently ignores unknown field names', () => { + const result = server.call('provar_project_inspect', { + project_path: tmpDir, + fields: 'summary,ghost_field', + }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('summary' in body); + assert.ok(!('ghost_field' in body)); + }); + + it('supports dot notation for nested field selection', () => { + const result = server.call('provar_project_inspect', { + project_path: tmpDir, + fields: 'summary.test_case_count,summary.coverage_percent', + }); + assert.equal(isError(result), false); + const body = parseText(result); + const summary = body['summary'] as Record; + assert.ok('test_case_count' in summary, 'test_case_count should be retained'); + assert.ok('coverage_percent' in summary, 'coverage_percent should be retained'); + assert.ok(!('provardx_properties_count' in summary), 'unspecified summary keys should be dropped'); + }); + + it('composes detail=summary with fields for fine-grained trimming', () => { + const result = server.call('provar_project_inspect', { + project_path: tmpDir, + detail: 'summary', + fields: 'summary', + }); + const body = parseText(result); + assert.ok('summary' in body); + assert.ok(!('requestId' in body), 'fields filter should further narrow after detail'); + }); +}); + +// ── provar_project_inspect — path-policy errors (unchanged) ─────────────────── + +describe('provar_project_inspect — path policy', () => { + it('returns PATH_NOT_ALLOWED when project_path is outside allowed paths', () => { + const strictServer = new MockMcpServer(); + registerProjectInspect(strictServer as unknown as McpServer, { allowedPaths: [tmpDir] }); + const result = strictServer.call('provar_project_inspect', { + project_path: path.join(os.tmpdir(), 'some-other-project'), + }); + assert.equal(isError(result), true); + const code = parseText(result)['error_code'] as string; + assert.ok(code === 'PATH_NOT_ALLOWED' || code === 'PATH_TRAVERSAL', `Unexpected code: ${code}`); + }); + + it('returns PATH_NOT_FOUND when project path does not exist', () => { + const result = server.call('provar_project_inspect', { + project_path: path.join(tmpDir, 'nonexistent-dir'), + }); + assert.equal(isError(result), true); + assert.equal(parseText(result)['error_code'], 'PATH_NOT_FOUND'); + }); +}); diff --git a/test/unit/mcp/projectValidateFromPath.test.ts b/test/unit/mcp/projectValidateFromPath.test.ts index 687d1680..7012bff1 100644 --- a/test/unit/mcp/projectValidateFromPath.test.ts +++ b/test/unit/mcp/projectValidateFromPath.test.ts @@ -445,4 +445,142 @@ describe('provar_project_validate (from path)', () => { ); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes quality_score and completeness_score', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir, save_results: false }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('quality_score' in body, 'standard should include quality_score'); + assert.ok('completeness_score' in body, 'standard should include completeness_score'); + assert.ok('recommended_next_action' in body, 'standard should include recommended_next_action'); + }); + + it('summary response includes only key fields, not violation details', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { + project_path: tmpDir, + save_results: false, + detail: 'summary', + }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('project_violations_by_rule' in body), 'summary should NOT include project_violations_by_rule'); + assert.ok(!('plans_summary' in body), 'summary should NOT include plans_summary'); + }); + }); + + describe('PDX-471 — run_id and baseline_run_id diff mode', () => { + it('run_id is present when save_results=true (default)', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok(typeof body['run_id'] === 'string' && body['run_id'].length > 0, 'run_id should be a non-empty string'); + }); + + it('run_id is absent when save_results=false', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir, save_results: false }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok(!('run_id' in body), 'run_id should not be present when save_results=false'); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: 'nonexistent-run-id-xyz', + }); + assert.equal(isError(result), true); + const body = parseText(result); + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', () => { + makeProject(tmpDir); + const first = server.call('provar_project_validate', { project_path: tmpDir }); + assert.equal(isError(first), false); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + const second = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: runId, + }); + assert.equal(isError(second), false); + const diffBody = parseText(second); + assert.ok('added' in diffBody, 'diff should include added'); + assert.ok('resolved' in diffBody, 'diff should include resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should include unchanged_count'); + assert.ok('run_id' in diffBody, 'diff should include run_id'); + }); + + it('diff response includes completeness_score and recommended_next_action', () => { + makeProject(tmpDir); + const first = server.call('provar_project_validate', { project_path: tmpDir }); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + const second = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: runId, + }); + const diffBody = parseText(second); + assert.ok('completeness_score' in diffBody, 'diff should include completeness_score'); + assert.ok('recommended_next_action' in diffBody, 'diff should include recommended_next_action'); + }); + + it('returns diff (not BASELINE_NOT_FOUND) when save_results=false and baseline_run_id is set (B4)', () => { + // Read-only diff: callers must be able to compare against an existing + // baseline without persisting the current run. The pre-fix gated baseline + // load on save_results !== false, so a valid baseline returned BASELINE_NOT_FOUND. + makeProject(tmpDir); + const first = server.call('provar_project_validate', { project_path: tmpDir }); + const runId = (parseText(first) as { run_id: string }).run_id; + + const second = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: runId, + save_results: false, + }); + assert.equal(isError(second), false, 'read-only diff must not error'); + const body = parseText(second); + assert.ok('added' in body, 'read-only diff must include added'); + assert.ok('resolved' in body, 'read-only diff must include resolved'); + assert.ok('unchanged_count' in body, 'read-only diff must include unchanged_count'); + assert.ok(!('run_id' in body), 'read-only diff should NOT include run_id when save_results=false'); + }); + }); + + describe('PDX-473 — stop decision counts all-level violations (B3)', () => { + it('recommended_next_action is NOT stop when nested violations remain at completeness 100', () => { + // The fixture project (makeProject) creates a structurally valid test case + // covered by a plan, yielding test_cases_valid===total. But the project + // typically has plan/suite-level violations (e.g. missing plan metadata + // from the bare .planitem). The stop decision must reflect those. + makeProject(tmpDir); + const result = server.call('provar_project_validate', { + project_path: tmpDir, + save_results: false, + }); + assert.equal(isError(result), false); + const body = parseText(result); + if (body['completeness_score'] === 100) { + // If the fixture happens to be 100% complete in completeness terms, the + // stop decision must still account for any nested violations that the + // pre-fix snapshot ignored. + assert.notEqual( + body['recommended_next_action'], + 'stop', + `Expected NOT stop while nested violations remain, got: ${String(body['recommended_next_action'])}` + ); + } + }); + }); }); diff --git a/test/unit/mcp/qualityHubTools.test.ts b/test/unit/mcp/qualityHubTools.test.ts index 11274d2f..edbe4801 100644 --- a/test/unit/mcp/qualityHubTools.test.ts +++ b/test/unit/mcp/qualityHubTools.test.ts @@ -163,6 +163,58 @@ describe('qualityHubTools', () => { }); }); + // ── provar_qualityhub_display — detail + fields ──────────────────────────── + + describe('provar_qualityhub_display — detail param', () => { + it('standard (default) returns requestId, exitCode, stdout, stderr', () => { + spawnStub.returns(makeSpawnResult('display output', '', 0)); + const result = server.call('provar_qualityhub_display', { flags: [] }); + const body = parseBody(result); + assert.ok('requestId' in body); + assert.ok('exitCode' in body); + assert.ok('stdout' in body); + assert.ok('stderr' in body); + }); + + it('summary returns only requestId and exitCode', () => { + spawnStub.returns(makeSpawnResult('display output', '', 0)); + const result = server.call('provar_qualityhub_display', { flags: [], detail: 'summary' }); + const body = parseBody(result); + assert.ok('requestId' in body, 'summary must include requestId'); + assert.ok('exitCode' in body, 'summary must include exitCode'); + assert.ok(!('stdout' in body), 'summary must not include stdout'); + assert.ok(!('stderr' in body), 'summary must not include stderr'); + }); + + it('full returns same fields as standard', () => { + spawnStub.returns(makeSpawnResult('display output', '', 0)); + const full = parseBody(server.call('provar_qualityhub_display', { flags: [], detail: 'full' })); + const std = parseBody(server.call('provar_qualityhub_display', { flags: [], detail: 'standard' })); + assert.deepEqual(Object.keys(full).sort(), Object.keys(std).sort()); + }); + }); + + describe('provar_qualityhub_display — fields param', () => { + it('retains only specified keys', () => { + spawnStub.returns(makeSpawnResult('display output', '', 0)); + const result = server.call('provar_qualityhub_display', { flags: [], fields: 'exitCode,stdout' }); + const body = parseBody(result); + assert.ok('exitCode' in body); + assert.ok('stdout' in body); + assert.ok(!('requestId' in body)); + assert.ok(!('stderr' in body)); + }); + + it('silently ignores unknown fields', () => { + spawnStub.returns(makeSpawnResult('ok', '', 0)); + const result = server.call('provar_qualityhub_display', { flags: [], fields: 'exitCode,ghost' }); + assert.equal(isError(result), false); + const body = parseBody(result); + assert.ok('exitCode' in body); + assert.ok(!('ghost' in body)); + }); + }); + // ── provar_qualityhub_testrun ─────────────────────────────────────────────── describe('provar_qualityhub_testrun', () => { @@ -396,6 +448,62 @@ describe('qualityHubTools', () => { }); }); + // ── provar_qualityhub_testcase_retrieve — detail + fields ───────────────────── + + describe('provar_qualityhub_testcase_retrieve — detail param', () => { + it('standard (default) returns requestId, exitCode, stdout, stderr', () => { + spawnStub.returns(makeSpawnResult('[]', '', 0)); + const result = server.call('provar_qualityhub_testcase_retrieve', { target_org: 'myorg', flags: [] }); + const body = parseBody(result); + assert.ok('requestId' in body); + assert.ok('exitCode' in body); + assert.ok('stdout' in body); + assert.ok('stderr' in body); + }); + + it('summary returns only requestId and exitCode', () => { + spawnStub.returns(makeSpawnResult('[]', '', 0)); + const result = server.call('provar_qualityhub_testcase_retrieve', { + target_org: 'myorg', + flags: [], + detail: 'summary', + }); + const body = parseBody(result); + assert.ok('requestId' in body, 'summary must include requestId'); + assert.ok('exitCode' in body, 'summary must include exitCode'); + assert.ok(!('stdout' in body), 'summary must not include stdout'); + assert.ok(!('stderr' in body), 'summary must not include stderr'); + }); + }); + + describe('provar_qualityhub_testcase_retrieve — fields param', () => { + it('retains only specified keys', () => { + spawnStub.returns(makeSpawnResult('[]', '', 0)); + const result = server.call('provar_qualityhub_testcase_retrieve', { + target_org: 'myorg', + flags: [], + fields: 'exitCode,stdout', + }); + const body = parseBody(result); + assert.ok('exitCode' in body); + assert.ok('stdout' in body); + assert.ok(!('requestId' in body)); + }); + + it('silently ignores unknown field names', () => { + spawnStub.returns(makeSpawnResult('[]', '', 0)); + const result = server.call('provar_qualityhub_testcase_retrieve', { + target_org: 'myorg', + flags: [], + fields: 'exitCode,nope', + }); + assert.equal(isError(result), false); + const body = parseBody(result); + assert.ok('exitCode' in body); + assert.ok(!('nope' in body)); + }); + }); + // ── sf_path threading ───────────────────────────────────────────────────────── describe('sf_path threading', () => { diff --git a/test/unit/mcp/startupTuning.test.ts b/test/unit/mcp/startupTuning.test.ts new file mode 100644 index 00000000..c16b532e --- /dev/null +++ b/test/unit/mcp/startupTuning.test.ts @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import { describe, it, afterEach } from 'mocha'; +import { parseActiveGroups } from '../../../src/mcp/server.js'; +import { desc } from '../../../src/mcp/tools/descHelper.js'; +import { registerTestSuiteValidate } from '../../../src/mcp/tools/testSuiteValidate.js'; +import { registerAllNitroXTools } from '../../../src/mcp/tools/nitroXTools.js'; +import { registerProjectInspect } from '../../../src/mcp/tools/projectInspect.js'; + +// ── Minimal McpServer mock ───────────────────────────────────────────────────── + +type ToolConfig = { title?: string; description: string; inputSchema: unknown }; + +class MockMcpServer { + public readonly registered = new Map(); + + public registerTool(name: string, config: ToolConfig): void { + this.registered.set(name, config); + } +} + +const MOCK_CONFIG = { allowedPaths: ['/tmp'] }; + +// ── PDX-468: desc() helper ──────────────────────────────────────────────────── + +describe('desc() helper (PDX-468)', () => { + afterEach(() => { + delete process.env['PROVAR_MCP_SCHEMA_MODE']; + }); + + it('returns standard string when PROVAR_MCP_SCHEMA_MODE is unset', () => { + assert.equal(desc('standard text', 'compact text'), 'standard text'); + }); + + it('returns compact string when PROVAR_MCP_SCHEMA_MODE=compact', () => { + process.env['PROVAR_MCP_SCHEMA_MODE'] = 'compact'; + assert.equal(desc('standard text', 'compact text'), 'compact text'); + }); + + it('returns standard string for any value other than "compact"', () => { + process.env['PROVAR_MCP_SCHEMA_MODE'] = 'verbose'; + assert.equal(desc('standard text', 'compact text'), 'standard text'); + }); +}); + +// ── PDX-468: compact descriptions in registered tools ───────────────────────── + +describe('compact tool descriptions (PDX-468)', () => { + afterEach(() => { + delete process.env['PROVAR_MCP_SCHEMA_MODE']; + }); + + it('registers standard description when PROVAR_MCP_SCHEMA_MODE is unset', () => { + const mock = new MockMcpServer(); + registerTestSuiteValidate(mock as never); + const cfg = mock.registered.get('provar_testsuite_validate'); + assert.ok(cfg, 'provar_testsuite_validate should be registered'); + assert.ok(cfg.description.length > 50, 'standard description should be multi-sentence (>50 chars)'); + assert.ok(cfg.description.includes('checks for empty suites'), 'standard description should include detail text'); + }); + + it('registers compact description when PROVAR_MCP_SCHEMA_MODE=compact', () => { + process.env['PROVAR_MCP_SCHEMA_MODE'] = 'compact'; + const mock = new MockMcpServer(); + registerTestSuiteValidate(mock as never); + const cfg = mock.registered.get('provar_testsuite_validate'); + assert.ok(cfg, 'provar_testsuite_validate should be registered'); + assert.ok( + cfg.description.length <= 100, + `compact description should be short (≤100 chars), got ${cfg.description.length}` + ); + assert.ok( + !cfg.description.includes('checks for empty suites'), + 'compact description should not contain prose detail' + ); + }); + + it('reverts to standard description when PROVAR_MCP_SCHEMA_MODE is unrecognised', () => { + process.env['PROVAR_MCP_SCHEMA_MODE'] = 'verbose'; + const mock = new MockMcpServer(); + registerTestSuiteValidate(mock as never); + const cfg = mock.registered.get('provar_testsuite_validate'); + assert.ok(cfg, 'provar_testsuite_validate should be registered'); + assert.ok(cfg.description.includes('checks for empty suites'), 'should fall back to standard for unknown mode'); + }); +}); + +// ── PDX-469: parseActiveGroups() ────────────────────────────────────────────── + +describe('parseActiveGroups() (PDX-469)', () => { + afterEach(() => { + delete process.env['PROVAR_MCP_TOOLS']; + }); + + it('returns null when env var is unset (all groups active)', () => { + assert.equal(parseActiveGroups(), null); + }); + + it('returns null when env var is empty string', () => { + process.env['PROVAR_MCP_TOOLS'] = ''; + assert.equal(parseActiveGroups(), null); + }); + + it('returns null when env var is whitespace only', () => { + process.env['PROVAR_MCP_TOOLS'] = ' '; + assert.equal(parseActiveGroups(), null); + }); + + it('returns a Set with a single group name (lowercased)', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitroX'; + const groups = parseActiveGroups(); + assert.ok(groups instanceof Set); + assert.equal(groups.size, 1); + assert.ok(groups.has('nitrox')); + }); + + it('returns a Set with multiple group names (lowercased)', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitroX,validation'; + const groups = parseActiveGroups(); + assert.ok(groups instanceof Set); + assert.equal(groups.size, 2); + assert.ok(groups.has('nitrox')); + assert.ok(groups.has('validation')); + }); + + it('trims whitespace around group names', () => { + process.env['PROVAR_MCP_TOOLS'] = ' nitroX , validation '; + const groups = parseActiveGroups(); + assert.ok(groups instanceof Set); + assert.ok(groups.has('nitrox')); + assert.ok(groups.has('validation')); + }); + + it('ignores empty segments from trailing commas', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitroX,'; + const groups = parseActiveGroups(); + assert.ok(groups instanceof Set); + assert.equal(groups.size, 1); + assert.ok(groups.has('nitrox')); + }); + + it('returns null when env var is only a comma (no valid group names)', () => { + process.env['PROVAR_MCP_TOOLS'] = ','; + assert.equal(parseActiveGroups(), null); + }); + + it('returns null when env var is only commas (no valid group names)', () => { + process.env['PROVAR_MCP_TOOLS'] = ',,'; + assert.equal(parseActiveGroups(), null); + }); + + // ── H1: unknown group names ─────────────────────────────────────────────── + it('returns null when every requested group name is unknown (typo footgun)', () => { + // Pre-fix: a typo like 'validaton' produced Set{'validaton'} which matched + // no group and silently disabled all tools. Now we fall back to null (all + // tools) so the server is never left with an empty Provar tool surface. + process.env['PROVAR_MCP_TOOLS'] = 'validaton'; + assert.equal(parseActiveGroups(), null); + }); + + it('keeps known names and ignores unknown ones in a mixed list', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitroX,bogusgroup,validation'; + const groups = parseActiveGroups(); + assert.ok(groups instanceof Set); + assert.equal(groups.size, 2); + assert.ok(groups.has('nitrox')); + assert.ok(groups.has('validation')); + assert.ok(!groups.has('bogusgroup')); + }); +}); + +// ── PDX-469: tool profile registration ──────────────────────────────────────── + +describe('tool profile registration (PDX-469)', () => { + afterEach(() => { + delete process.env['PROVAR_MCP_TOOLS']; + }); + + it('registers nitroX tools when profile includes nitrox', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitroX'; + const mock = new MockMcpServer(); + registerAllNitroXTools(mock as never, MOCK_CONFIG); + assert.ok(mock.registered.has('provar_nitrox_discover'), 'nitrox tools should be registered'); + assert.ok(mock.registered.has('provar_nitrox_generate'), 'nitrox generate should be registered'); + }); + + it('registers inspect tools independently of profile (direct call)', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitrox'; + const mock = new MockMcpServer(); + registerProjectInspect(mock as never, MOCK_CONFIG); + assert.ok(mock.registered.has('provar_project_inspect')); + }); + + it('provardx_ping group is not in parseActiveGroups — it is always registered separately', () => { + process.env['PROVAR_MCP_TOOLS'] = 'nitrox'; + const groups = parseActiveGroups(); + assert.ok(groups !== null, 'groups should be a Set when PROVAR_MCP_TOOLS is set'); + assert.ok(!groups.has('ping'), 'ping is not a filterable group'); + }); +}); diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts index c6ba33df..95c7579b 100644 --- a/test/unit/mcp/testCaseGenerate.test.ts +++ b/test/unit/mcp/testCaseGenerate.test.ts @@ -21,7 +21,9 @@ import type { ServerConfig } from '../../../src/mcp/server.js'; type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _description: string, _schema: unknown, handler: ToolHandler): void { @@ -30,8 +32,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -87,6 +97,140 @@ describe('provar_testcase_generate description', () => { 'description should include step-reference fallback' ); }); + + // ── PDX-482 regression guard: construction contract at the call site ────── + // The PDX-479 regression came from upstream guidance steering agents toward + // multi-call construction. These assertions protect the in-tool contract so + // even if upstream prompts/resources regress again, the LLM reads the + // single-call requirement at every call site. + + it('TOOL_DESCRIPTION carries the single-call construction contract', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.description.includes('Construction pattern'), + 'description must lead with the construction-pattern contract for PDX-479 protection' + ); + assert.ok( + reg.description.includes('single call'), + 'description must say "single call" so the contract is greppable from the call site' + ); + assert.ok(reg.description.includes('FULL step tree'), 'description must instruct passing the FULL step tree'); + }); + + it('TOOL_DESCRIPTION marks step_edit as AMENDING, not constructing', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.description.includes('AMENDING'), + 'description must explicitly say provar_testcase_step_edit is for AMENDING (caps for emphasis at the call site)' + ); + // Use a literal substring match (not a regex) — the previous regex + // /step_edit[^.]*not for CONSTRUCTING|CONSTRUCTING[^.]*not/i had a + // false-positive: the second alternative would pass on hostile text like + // "constructing is the only way... not via generate". Locking on the + // exact canonical phrasing prevents that drift. + assert.ok( + reg.description.includes('not for CONSTRUCTING one from scratch'), + 'description must explicitly say step_edit is "not for CONSTRUCTING one from scratch" (literal canonical phrase)' + ); + }); + + it('TOOL_DESCRIPTION gives stop-and-assemble guidance for the common mistake', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.description.includes('stop and assemble') || reg.description.includes('stop, and assemble'), + 'description must tell agents to stop and assemble the full step list before calling — the most common mistake' + ); + }); + + // ── PDX-482 hardening: leading-position assertion (adversarial review fix) ── + // The contract must appear EARLY in the description because LLMs weight + // earlier tokens more heavily and many MCP clients truncate descriptions. + // Without this guard, a future refactor could move the contract to the end + // of the joined array and every other assertion would still pass. + it('Construction contract appears in the first 200 characters of the description', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + const pos = reg.description.indexOf('Construction pattern'); + assert.ok(pos >= 0, 'description must contain "Construction pattern"'); + assert.ok( + pos < 200, + `"Construction pattern" must appear in the first 200 chars (found at ${pos}) — LLMs weight leading tokens more` + ); + }); + + // ── PDX-484: title-level construct-vs-amend contract ────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the single-call construction contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.includes('one call') || reg.title.includes('single call'), + 'title must contain "one call" or "single call" so the contract is visible in tool-picker chips' + ); + assert.ok( + /step/i.test(reg.title), + 'title must mention steps so the LLM sees the payload shape at the chip-level surface' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); + + // ── PDX-482 hardening: compact-mode coverage (adversarial review fix) ────── + // PROVAR_MCP_SCHEMA_MODE=compact swaps the entire description for a short + // one-liner. Without this guard, compact mode is a regression highway: + // the LLM would see a contract-free description and could fall back to the + // multi-call pattern that caused PDX-479. + describe('compact-mode (PROVAR_MCP_SCHEMA_MODE=compact)', () => { + const ORIGINAL_MODE = process.env['PROVAR_MCP_SCHEMA_MODE']; + let compactServer: MockMcpServer; + + beforeEach(() => { + process.env['PROVAR_MCP_SCHEMA_MODE'] = 'compact'; + compactServer = new MockMcpServer(); + registerTestCaseGenerate(compactServer as never, { allowedPaths: [tmpDir] }); + }); + + afterEach(() => { + if (ORIGINAL_MODE === undefined) { + delete process.env['PROVAR_MCP_SCHEMA_MODE']; + } else { + process.env['PROVAR_MCP_SCHEMA_MODE'] = ORIGINAL_MODE; + } + }); + + it('compact description still carries the single-call construction contract', () => { + const reg = compactServer.registrations.find((r) => r.name === 'provar_testcase_generate'); + assert.ok(reg, 'tool should be registered in compact mode'); + assert.ok( + reg.description.includes('ONE call'), + 'compact description must say "ONE call" — otherwise compact mode silently strips the contract (PDX-479 regression highway)' + ); + assert.ok(reg.description.includes('FULL steps'), 'compact description must mention the FULL steps[] tree'); + assert.ok( + reg.description.includes('AMENDING') || reg.description.includes('amend'), + 'compact description must mark step_edit as amendment-only' + ); + assert.ok( + !reg.description.includes('UUID guids and steps structure'), + 'old compact form (contract-free) must not be in use anymore' + ); + }); + }); }); // ── provar_testcase_generate ─────────────────────────────────────────────────── @@ -277,11 +421,16 @@ describe('provar_testcase_generate', () => { }); describe('writing to disk', () => { + // Each disk-write test uses a non-empty steps[] so the PDX-483 STEPS_REQUIRED + // guard (which rejects steps:[]+dry_run:false+output_path) does not fire. + // These tests assert *other* behaviour: file write, overwrite, mkdirp, path policy. + const SMOKE_STEPS = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }]; + it('writes file when dry_run=false and output_path provided', () => { const outPath = path.join(tmpDir, 'Login.testcase'); const result = server.call('provar_testcase_generate', { test_case_name: 'Login', - steps: [], + steps: SMOKE_STEPS, output_path: outPath, dry_run: false, overwrite: false, @@ -310,7 +459,7 @@ describe('provar_testcase_generate', () => { const result = server.call('provar_testcase_generate', { test_case_name: 'Existing', - steps: [], + steps: SMOKE_STEPS, output_path: outPath, dry_run: false, overwrite: false, @@ -326,7 +475,7 @@ describe('provar_testcase_generate', () => { const result = server.call('provar_testcase_generate', { test_case_name: 'Existing', - steps: [], + steps: SMOKE_STEPS, output_path: outPath, dry_run: false, overwrite: true, @@ -341,7 +490,7 @@ describe('provar_testcase_generate', () => { const outPath = path.join(tmpDir, 'tests', 'suite', 'Login.testcase'); server.call('provar_testcase_generate', { test_case_name: 'Login', - steps: [], + steps: SMOKE_STEPS, output_path: outPath, dry_run: false, overwrite: false, @@ -351,14 +500,144 @@ describe('provar_testcase_generate', () => { }); }); + // ── PDX-483 runtime guard: reject empty steps[] on non-dry-run with output_path ── + // The PDX-479 regression class arose from agents calling generate with steps:[] + // intending to append later via step_edit. The passive contract (PDX-482) lives in + // the description; the active runtime guard rejects the exact shape that produces + // a contract-violating file on disk. The 6 edge cases below pin down which empty- + // steps shapes are allowed (dry-run preview, inspection-only) vs rejected (file write). + describe('STEPS_REQUIRED runtime guard (PDX-483)', () => { + const SINGLE_STEP = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }]; + + it('allows steps:[] + dry_run:true + no output_path (skeleton inspection)', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'Skeleton Inspect', + steps: [], + dry_run: true, + overwrite: false, + }); + assert.equal(isError(result), false, 'dry-run skeleton inspection must remain allowed'); + assert.equal(parseText(result)['written'], false); + }); + + it('allows steps:[] + dry_run:true + output_path provided (dry-run preview wins)', () => { + const outPath = path.join(tmpDir, 'DryRunWithPath.testcase'); + const result = server.call('provar_testcase_generate', { + test_case_name: 'DryRun With Path', + steps: [], + output_path: outPath, + dry_run: true, + overwrite: false, + }); + assert.equal(isError(result), false, 'dry-run wins over output_path — no file is written'); + assert.equal(fs.existsSync(outPath), false, 'file must not be written in dry_run mode'); + }); + + it('allows steps:[] + dry_run:false + no output_path (no persistence target)', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'No Output Path', + steps: [], + dry_run: false, + overwrite: false, + }); + assert.equal(isError(result), false, 'no output_path means no file write — TODO-only XML is harmless'); + assert.equal(parseText(result)['written'], false); + }); + + it('REJECTS steps:[] + dry_run:false + output_path with STEPS_REQUIRED', () => { + const outPath = path.join(tmpDir, 'Empty.testcase'); + const result = server.call('provar_testcase_generate', { + test_case_name: 'Empty Build', + steps: [], + output_path: outPath, + dry_run: false, + overwrite: false, + }); + assert.equal(isError(result), true, 'multi-call construction pattern must be rejected'); + const body = parseText(result); + assert.equal(body['error_code'], 'STEPS_REQUIRED'); + assert.equal(body['retryable'], false); + const details = body['details'] as Record; + assert.ok(details, 'error must include details'); + const suggestion = details['suggestion']; + assert.ok(typeof suggestion === 'string', 'details.suggestion must be a string'); + assert.ok(suggestion.length > 0, 'details.suggestion must be non-empty'); + assert.ok( + suggestion.includes('FULL step tree'), + 'suggestion must instruct passing the FULL step tree in a single call' + ); + assert.ok( + suggestion.includes('dry_run=true'), + 'suggestion must mention the dry_run=true escape hatch for skeleton inspection' + ); + }); + + it('STEPS_REQUIRED rejection writes NO file (assertion: fs.existsSync === false)', () => { + const outPath = path.join(tmpDir, 'NeverWritten.testcase'); + server.call('provar_testcase_generate', { + test_case_name: 'Never Written', + steps: [], + output_path: outPath, + dry_run: false, + overwrite: false, + }); + assert.equal( + fs.existsSync(outPath), + false, + 'STEPS_REQUIRED rejection must run BEFORE fs.writeFileSync — no skeleton on disk' + ); + }); + + it('allows non-empty steps + dry_run:false + output_path (happy path — normal write)', () => { + const outPath = path.join(tmpDir, 'HappyPath.testcase'); + const result = server.call('provar_testcase_generate', { + test_case_name: 'Happy Path', + steps: SINGLE_STEP, + output_path: outPath, + dry_run: false, + overwrite: false, + }); + assert.equal(isError(result), false, 'normal write path must remain unchanged'); + assert.equal(parseText(result)['written'], true); + assert.equal(fs.existsSync(outPath), true, 'happy-path file must be written'); + }); + + // Path-policy ordering check: the guard must fire BEFORE assertPathAllowed + // so that a caller in the rejected shape gets STEPS_REQUIRED (the actionable + // root-cause error), not PATH_NOT_ALLOWED (which would mislead about the fix). + it('STEPS_REQUIRED fires BEFORE path policy when both would reject', () => { + const strictServer = new MockMcpServer(); + registerTestCaseGenerate(strictServer as never, { allowedPaths: [tmpDir] }); + const result = strictServer.call('provar_testcase_generate', { + test_case_name: 'Outside And Empty', + steps: [], + // Path outside allowedPaths AND empty steps — STEPS_REQUIRED must win + // because its suggestion is the actionable one (path is moot if no steps). + output_path: path.join(os.tmpdir(), 'outside-and-empty.testcase'), + dry_run: false, + overwrite: false, + }); + assert.equal(isError(result), true); + assert.equal( + parseText(result)['error_code'], + 'STEPS_REQUIRED', + 'STEPS_REQUIRED must fire before assertPathAllowed — the empty-payload root cause is what the LLM needs to see' + ); + }); + }); + describe('path policy', () => { + // Uses a non-empty steps[] to bypass the PDX-483 STEPS_REQUIRED guard so + // the assertion targets the PATH_NOT_ALLOWED branch specifically. + const SMOKE_STEPS = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }]; + it('returns PATH_NOT_ALLOWED when output_path is outside allowedPaths', () => { const strictServer = new MockMcpServer(); registerTestCaseGenerate(strictServer as never, { allowedPaths: [tmpDir] }); const result = strictServer.call('provar_testcase_generate', { test_case_name: 'Evil', - steps: [], + steps: SMOKE_STEPS, output_path: path.join(os.tmpdir(), 'evil.testcase'), dry_run: false, overwrite: false, @@ -951,4 +1230,168 @@ describe('provar_testcase_generate', () => { assert.ok(!xml.includes('class="compound"'), 'Pure {VarName} must NOT use class="compound"'); }); }); + + // ── PDX-481 regression guard ───────────────────────────────────────────────── + // The 1.5.0 regression (PDX-479) happened when agents authored test cases + // step-by-step via repeated tool calls instead of constructing the full step + // tree in a single provar_testcase_generate call. This block proves that + // when the full tree IS passed in one call, the output is structurally clean: + // scenarios numbered consecutively, asserts emitted with consistent types, + // and testItemIds sequential. + + describe('multi-scenario single-call construction (PDX-481 regression guard)', () => { + it('emits consecutive testItemIds across a 3-scenario, multi-step payload', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'AccountFlow', + steps: [ + // Scenario 1 — Create Account + { api_id: 'UiConnect', name: 'Salesforce Connect', attributes: {} }, + { + api_id: 'SetValues', + name: 'Set Account Test Data', + attributes: { AccountName: 'Acme', AccountPhone: '555-0100' }, + }, + { api_id: 'UiNavigate', name: 'Scenario 1: navigate to Account home', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 1: click New', attributes: {} }, + { + api_id: 'SetValues', + name: 'Scenario 1: fill form', + attributes: { Name: '{AccountName}', Phone: '{AccountPhone}' }, + }, + { api_id: 'UiDoAction', name: 'Scenario 1: click Save', attributes: {} }, + // Scenario 2 — Verify on list view (the scenario that went missing on 1.5.0) + { api_id: 'UiNavigate', name: 'Scenario 2: go to Account list', attributes: {} }, + { + api_id: 'AssertValues', + name: 'Scenario 2: assert Name on list', + attributes: { expectedValue: '{AccountName}', actualValue: 'Name', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Scenario 2: assert Phone on list', + attributes: { expectedValue: '{AccountPhone}', actualValue: 'Phone', comparisonType: 'EqualTo' }, + }, + // Scenario 3 — Open detail and assert all + { api_id: 'UiDoAction', name: 'Scenario 3: open Account detail', attributes: {} }, + { + api_id: 'AssertValues', + name: 'Scenario 3: assert Name on detail', + attributes: { expectedValue: '{AccountName}', actualValue: 'Name', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Scenario 3: assert Phone on detail', + attributes: { expectedValue: '{AccountPhone}', actualValue: 'Phone', comparisonType: 'EqualTo' }, + }, + ], + dry_run: true, + overwrite: false, + }); + + assert.equal(isError(result), false, 'single-call multi-scenario generate must succeed'); + const body = parseText(result); + assert.equal(body['step_count'], 12, 'all 12 steps must be present (no scenarios dropped)'); + + const xml = body['xml_content'] as string; + // testItemIds must be exactly 1..12 — gaps indicate dropped steps. + for (let i = 1; i <= 12; i++) { + assert.ok( + xml.includes(`testItemId="${i}"`), + `expected sequential testItemId="${i}" — gap means a scenario step was dropped` + ); + } + // No higher testItemIds emitted (would indicate spurious appends from an internal step_edit loop). + assert.ok(!xml.includes('testItemId="13"'), 'no spurious testItemIds beyond the payload count'); + }); + + it('preserves every step name from the payload — no scenario marker is silently dropped', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'ScenarioMarkers', + steps: [ + { api_id: 'UiDoAction', name: 'Scenario 1: When create', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 1: Then verify', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 2: When edit', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 2: Then verify', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 3: When delete', attributes: {} }, + { api_id: 'UiDoAction', name: 'Scenario 3: Then absent', attributes: {} }, + ], + dry_run: true, + overwrite: false, + }); + + assert.equal(isError(result), false); + const xml = parseText(result)['xml_content'] as string; + for (const marker of [ + 'Scenario 1: When create', + 'Scenario 1: Then verify', + 'Scenario 2: When edit', + 'Scenario 2: Then verify', + 'Scenario 3: When delete', + 'Scenario 3: Then absent', + ]) { + assert.ok(xml.includes(marker), `scenario marker "${marker}" must be preserved verbatim`); + } + }); + + it('emits consistent assert API IDs for repeated AssertValues — no drift between calls', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'AssertConsistency', + steps: [ + { + api_id: 'AssertValues', + name: 'Assert 1', + attributes: { expectedValue: '{a}', actualValue: 'x', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Assert 2', + attributes: { expectedValue: '{b}', actualValue: 'y', comparisonType: 'EqualTo' }, + }, + { + api_id: 'AssertValues', + name: 'Assert 3', + attributes: { expectedValue: '{c}', actualValue: 'z', comparisonType: 'EqualTo' }, + }, + ], + dry_run: true, + overwrite: false, + }); + + assert.equal(isError(result), false); + const xml = parseText(result)['xml_content'] as string; + const assertValuesMatches = xml.match(/apiId="com\.provar\.plugins\.bundled\.apis\.AssertValues"/g) ?? []; + assert.equal(assertValuesMatches.length, 3, 'all 3 asserts must use AssertValues — no API ID drift'); + // None of them should silently become UiAssert. + assert.ok( + !xml.includes('apiId="com.provar.plugins.forcedotcom.core.ui.UiAssert"'), + 'no AssertValues should be substituted with UiAssert' + ); + }); + + it('wraps a non-SF target_uri in UiWithScreen with nested steps — full tree in one call', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'PageObjectNested', + target_uri: 'ui:pageobject:target?pageId=pageobjects.AccountPage', + steps: [ + { api_id: 'UiDoAction', name: 'Click new', attributes: {} }, + { + api_id: 'AssertValues', + name: 'Assert created', + attributes: { expectedValue: '{x}', actualValue: 'y', comparisonType: 'EqualTo' }, + }, + ], + dry_run: true, + overwrite: false, + }); + + assert.equal(isError(result), false); + const xml = parseText(result)['xml_content'] as string; + assert.ok(xml.includes('UiWithScreen'), 'non-SF target_uri must wrap in UiWithScreen'); + assert.ok(xml.includes(''), 'wrapper must contain '); + assert.ok(xml.includes(''), 'substeps clause must have testItemId="2"'); + // Inner steps start at testItemId=3 per builder convention. + assert.ok(xml.includes('testItemId="3"'), 'first nested step must have testItemId="3"'); + assert.ok(xml.includes('testItemId="4"'), 'second nested step must have testItemId="4"'); + }); + }); }); diff --git a/test/unit/mcp/testCaseStepTools.test.ts b/test/unit/mcp/testCaseStepTools.test.ts index 809cf3cd..c353d264 100644 --- a/test/unit/mcp/testCaseStepTools.test.ts +++ b/test/unit/mcp/testCaseStepTools.test.ts @@ -18,7 +18,9 @@ import { registerAllTestCaseStepTools } from '../../../src/mcp/tools/testCaseSte type ToolHandler = (args: Record) => unknown; class MockMcpServer { - public registrations: Array<{ name: string; description: string }> = []; + // PDX-484: capture `title` alongside `description` so tests can assert on the + // title-level contract. Many MCP clients render only the title field. + public registrations: Array<{ name: string; description: string; title: string }> = []; private handlers = new Map(); public tool(name: string, _desc: string, _schema: unknown, handler: ToolHandler): void { @@ -27,8 +29,16 @@ class MockMcpServer { public registerTool(name: string, config: unknown, handler: ToolHandler): void { this.handlers.set(name, handler); - const desc = (config as Record)['description']; - if (typeof desc === 'string') this.registrations.push({ name, description: desc }); + const cfg = config as Record; + const desc = cfg['description']; + const title = cfg['title']; + if (typeof desc === 'string') { + this.registrations.push({ + name, + description: desc, + title: typeof title === 'string' ? title : '', + }); + } } public call(name: string, args: Record): ReturnType { @@ -96,6 +106,84 @@ describe('provar_testcase_step_edit description', () => { 'description should include step-reference fallback' ); }); + + // ── PDX-482 regression guard: amendment-only contract at the call site ──── + // The PDX-479 regression came from agents using step_edit to build test + // cases from scratch. This contract sits at the call site so the LLM reads + // it every time it considers calling step_edit, surviving any prompt drift. + + it('description self-identifies as AMENDMENT-ONLY at the top', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.description.includes('AMENDMENT-ONLY') || reg.description.includes('AMENDING'), + 'description must lead with AMENDMENT-ONLY / AMENDING framing so the LLM reads it before mechanics' + ); + }); + + it('description explicitly rejects construction-from-scratch usage', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.description.includes('NOT for constructing') || reg.description.includes('not for constructing'), + 'description must explicitly say it is NOT for constructing a test case from scratch' + ); + assert.ok( + reg.description.includes('provar_testcase_generate'), + 'description must point the agent at provar_testcase_generate for new test case construction' + ); + }); + + it('description warns about the structural defects from multi-call construction', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + // Adversarial review (PDX-482 hardening): require ALL three defects, not + // just one. An OR-clause would allow silent dilution where a future cleanup + // removes two of the three defects but leaves one and the test still passes. + // Listing the full consequence chain is what gives the LLM the "why" needed + // to apply judgement when guidance is ambiguous. + assert.ok( + reg.description.includes('dropped scenarios'), + 'description must call out "dropped scenarios" (the symptom that first surfaced PDX-479)' + ); + assert.ok( + reg.description.includes('flat asserts'), + 'description must call out "flat asserts" (the second observable defect)' + ); + assert.ok( + reg.description.includes('inconsistent step types'), + 'description must call out "inconsistent step types" (the third observable defect)' + ); + }); + + // ── PDX-484: title-level amendment-only contract ─────────────────────────── + // Many MCP clients (Claude Desktop tool-picker chips, Cursor audit pane, + // inline tool-call references in chat threads) render only the `title` + // field. Without the contract in the title an agent that reads only that + // surface gets zero PDX-479 protection. These assertions lock the title to + // the canonical phrasing chosen during the PDX-484 cross-client pilot. + + it('title carries the amendment-only contract (PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + /amend/i.test(reg.title), + 'title must contain "Amend" or "amendment" so the contract is visible in tool-picker chips' + ); + assert.ok( + /exist/i.test(reg.title), + 'title must signal "existing test case only" so an agent reading only the chip does not call this for construction' + ); + }); + + it('title fits the cross-client chip-render comfort threshold (≤50 chars, PDX-484)', () => { + const reg = server.registrations.find((r) => r.name === 'provar_testcase_step_edit'); + assert.ok(reg, 'tool should be registered'); + assert.ok( + reg.title.length <= 50, + `title length ${reg.title.length} exceeds 50 chars — Cursor and other clients may truncate` + ); + }); }); // ── provar_testcase_step_edit ────────────────────────────────────────────────── diff --git a/test/unit/mcp/testCaseValidate.test.ts b/test/unit/mcp/testCaseValidate.test.ts index fa8ed15f..9ff5bd2e 100644 --- a/test/unit/mcp/testCaseValidate.test.ts +++ b/test/unit/mcp/testCaseValidate.test.ts @@ -1000,6 +1000,105 @@ describe('registerTestCaseValidate handler', () => { assert.equal(result['validation_source'], 'local_fallback'); assert.ok(String(result['validation_warning']).toLowerCase().includes('rate limit')); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes is_valid, issues, and run_id', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + detail: 'standard', + })) as { content: Array<{ text: string }> }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok('is_valid' in result, 'standard should include is_valid'); + assert.ok('issues' in result, 'standard should include issues'); + assert.ok('run_id' in result, 'standard should include run_id'); + }); + + it('summary response includes only key fields, not issues', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + detail: 'summary', + })) as { content: Array<{ text: string }> }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok('is_valid' in result, 'summary should include is_valid'); + assert.ok('quality_score' in result, 'summary should include quality_score'); + assert.ok('completeness_score' in result, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in result, 'summary should include recommended_next_action'); + assert.ok(!('issues' in result), 'summary should NOT include issues'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + it('completeness_score is 100 for a valid test case', async () => { + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.equal(result['completeness_score'], 100); + }); + + it('recommended_next_action is not "stop" when quality violations remain (Bug 9)', async () => { + // VALID_TC is structurally valid (is_valid=true, score=100) but has BP violations. + // "stop" must not fire until ALL violations are resolved. + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok( + ['inspect_failures', 'fix_and_revalidate'].includes(result['recommended_next_action'] as string), + `Expected inspect_failures or fix_and_revalidate when BP violations remain, got: ${String( + result['recommended_next_action'] + )}` + ); + }); + + it('recommended_next_action is inspect_failures for an invalid test case (first run)', async () => { + const badXml = ''; + const res = (await capServer.capturedHandler!({ content: badXml })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.equal(result['completeness_score'], 0); + assert.equal(result['recommended_next_action'], 'inspect_failures'); + }); + }); + + describe('PDX-471 — baseline_run_id diff mode', () => { + it('run_id is present in every response', async () => { + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok(typeof result['run_id'] === 'string' && result['run_id'].length > 0); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + baseline_run_id: 'nonexistent-run-id-xyz', + })) as { isError?: boolean; content: Array<{ text: string }> }; + assert.equal(res.isError, true); + const body = JSON.parse(res.content[0].text) as Record; + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', async () => { + const first = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const firstBody = JSON.parse(first.content[0].text) as Record; + const runId = firstBody['run_id'] as string; + + const second = (await capServer.capturedHandler!({ + content: VALID_TC, + baseline_run_id: runId, + })) as { content: Array<{ text: string }> }; + assert.ok(!(second as { isError?: boolean }).isError); + const diffBody = JSON.parse(second.content[0].text) as Record; + assert.ok('added' in diffBody, 'diff should include added'); + assert.ok('resolved' in diffBody, 'diff should include resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should include unchanged_count'); + }); + }); }); // ── validateTestCaseXml ─────────────────────────────────────────────────────── diff --git a/test/unit/mcp/testPlanValidate.test.ts b/test/unit/mcp/testPlanValidate.test.ts index 2c257cf6..2c48e36d 100644 --- a/test/unit/mcp/testPlanValidate.test.ts +++ b/test/unit/mcp/testPlanValidate.test.ts @@ -344,4 +344,135 @@ describe('provar_testplan_validate', () => { assert.equal(isError(result), false); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes violations and test_suites', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'DetailPlan', + test_suites: [SUITE_A], + detail: 'standard', + }); + + const body = parseText(result); + assert.ok('violations' in body, 'standard should include violations'); + assert.ok('test_suites' in body, 'standard should include test_suites'); + }); + + it('summary response includes only key fields, not violations or test_suites', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'SummaryPlan', + test_suites: [SUITE_A], + detail: 'summary', + }); + + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('violations' in body), 'summary should NOT include violations'); + assert.ok(!('test_suites' in body), 'summary should NOT include test_suites'); + }); + + it('full response includes all fields (same as standard for plan)', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'FullPlan', + test_suites: [SUITE_A], + detail: 'full', + }); + + const body = parseText(result); + assert.ok('violations' in body, 'full should include violations'); + assert.ok('test_suites' in body, 'full should include test_suites'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + const TC_VALID = { name: 'Valid.testcase', xml_content: makeXml(G.tc1, G.s1, '1') }; + const SUITE_VALID = { name: 'ValidSuite', test_cases: [TC_VALID] }; + + it('completeness_score is present in every response', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'ScorePlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok('completeness_score' in body, 'completeness_score should be present'); + assert.ok(typeof body['completeness_score'] === 'number'); + }); + + it('completeness_score is 0 when plan has no test cases', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'EmptyPlan', + }); + + const body = parseText(result); + assert.equal(body['completeness_score'], 0); + }); + + it('recommended_next_action is a valid string value', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'ActionPlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok('recommended_next_action' in body); + const valid = ['stop', 'fix_and_revalidate', 'inspect_failures']; + assert.ok(valid.includes(body['recommended_next_action'] as string)); + }); + + it('recommended_next_action is NOT stop when test cases are structurally valid but BP violations remain (B1)', () => { + // TC_VALID parses as structurally valid (issues=0) but has BP violations + // (e.g. STRUCT-SUMMARY-001 — no tag). With fullMeta() the plan + // itself has no PLAN-META-* violations. The stop-decision safety hedge + // must include the nested per-test-case BP violations, so the action + // must NOT be 'stop' until those are resolved. + const result = server.call('provar_testplan_validate', { + plan_name: 'AllValidPlan', + test_suites: [SUITE_VALID], + metadata: fullMeta(), + }); + + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + assert.notEqual( + body['recommended_next_action'], + 'stop', + `Expected NOT stop while BP violations remain, got: ${String(body['recommended_next_action'])}` + ); + }); + + it('recommended_next_action is NOT stop when score=100 but plan metadata violations remain (B1)', () => { + // Same TC_VALID as above (structurally valid → completeness=100), but + // plan metadata is OMITTED, which triggers PLAN-META-* violations at the + // plan level. The old impl passed (score, false) to calcNextAction with + // a default remainingViolationCount=0, so stop fired despite plan + // violations. The fix collects plan/suite/tc/bp counts. + const result = server.call('provar_testplan_validate', { + plan_name: 'MissingMetaPlan', + test_suites: [SUITE_VALID], + // metadata intentionally omitted → PLAN-META-001..007 fire + }); + + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + assert.notEqual( + body['recommended_next_action'], + 'stop', + `Expected NOT stop while plan metadata violations remain, got: ${String(body['recommended_next_action'])}` + ); + }); + + it('recommended_next_action is inspect_failures when plan has failures (no baseline)', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'FailingPlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok((body['completeness_score'] as number) < 100); + assert.equal(body['recommended_next_action'], 'inspect_failures'); + }); + }); }); diff --git a/test/unit/mcp/testSuiteValidate.test.ts b/test/unit/mcp/testSuiteValidate.test.ts index cbe45023..8afeeb3e 100644 --- a/test/unit/mcp/testSuiteValidate.test.ts +++ b/test/unit/mcp/testSuiteValidate.test.ts @@ -7,7 +7,10 @@ /* eslint-disable camelcase */ import { strict as assert } from 'node:assert'; -import { describe, it, beforeEach } from 'mocha'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { describe, it, beforeEach, afterEach } from 'mocha'; import { registerTestSuiteValidate } from '../../../src/mcp/tools/testSuiteValidate.js'; // ── Minimal McpServer mock ───────────────────────────────────────────────────── @@ -70,18 +73,33 @@ const TC_LOGOUT = { name: 'LogoutTest.testcase', xml_content: makeXml(G.tc2, G.s const TC_LOGIN_ALIAS = { name: 'LoginTest.testcase', xml: makeXml(G.tc1, G.s1, 'tc-001') }; const TC_LOGOUT_ALIAS = { name: 'LogoutTest.testcase', xml: makeXml(G.tc2, G.s2, 'tc-002') }; -// ── Test setup ───────────────────────────────────────────────────────────────── - -let server: MockMcpServer; - -beforeEach(() => { - server = new MockMcpServer(); - registerTestSuiteValidate(server as never); -}); - // ── provar_testsuite_validate ───────────────────────────────────────────────── describe('provar_testsuite_validate', () => { + let server: MockMcpServer; + let origHomedir: () => string; + let tempHome: string; + + beforeEach(() => { + // Redirect os.homedir() into a temp dir so suiteStorageDir() writes to + // an isolated location instead of polluting the real developer/CI home. + // NOTE: scoped INSIDE this describe so the stub does not leak into other + // test files. Mocha root-level beforeEach attaches to the root suite and + // runs before every test in every file — see auth/rotate.test.ts which + // relies on the real os.homedir() and would otherwise see this stub. + tempHome = fs.mkdtempSync(path.join(os.tmpdir(), 'pvts-home-')); + origHomedir = os.homedir; + (os as unknown as { homedir: () => string }).homedir = (): string => tempHome; + + server = new MockMcpServer(); + registerTestSuiteValidate(server as never); + }); + + afterEach(() => { + (os as unknown as { homedir: () => string }).homedir = origHomedir; + fs.rmSync(tempHome, { recursive: true, force: true }); + }); + describe('happy path', () => { it('returns a result (not an error) for a valid non-empty suite', () => { const result = server.call('provar_testsuite_validate', { @@ -346,4 +364,146 @@ describe('provar_testsuite_validate', () => { assert.equal(isError(result), false); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes violations, test_cases, and run_id', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'DetailSuite', + test_cases: [TC_LOGIN], + detail: 'standard', + }); + const body = parseText(result); + assert.ok('violations' in body, 'standard should include violations'); + assert.ok('test_cases' in body, 'standard should include test_cases'); + assert.ok('run_id' in body, 'standard should include run_id'); + }); + + it('summary response includes only key metrics', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'SummarySuite', + test_cases: [TC_LOGIN], + detail: 'summary', + }); + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('violations' in body), 'summary should NOT include violations'); + assert.ok(!('test_cases' in body), 'summary should NOT include test_cases'); + }); + + it('full response includes all fields', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'FullSuite', + test_cases: [TC_LOGIN], + detail: 'full', + }); + const body = parseText(result); + assert.ok('violations' in body, 'full should include violations'); + assert.ok('test_cases' in body, 'full should include test_cases'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + // Valid XML: id="1" passes TC_010, proper UUID passes TC_011/012 + const TC_VALID = { name: 'Valid.testcase', xml_content: makeXml(G.tc1, G.s1, '1') }; + + it('completeness_score is present in response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'CompleteSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + assert.ok(typeof body['completeness_score'] === 'number', 'completeness_score should be a number'); + }); + + it('completeness_score is 0 when suite has no test cases', () => { + const result = server.call('provar_testsuite_validate', { suite_name: 'EmptySuite' }); + const body = parseText(result); + assert.equal(body['completeness_score'], 0); + }); + + it('completeness_score is 100 when all test cases are valid', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'AllValidSuite', + test_cases: [TC_VALID], + }); + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + }); + + it('recommended_next_action is a string in the response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'ActionSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + const action = body['recommended_next_action']; + assert.ok(typeof action === 'string', 'recommended_next_action should be a string'); + assert.ok(['stop', 'inspect_failures', 'fix_and_revalidate'].includes(action), `Unexpected action: ${action}`); + }); + + it('recommended_next_action is NOT "stop" when test cases have BP violations (B2)', () => { + // TC_VALID is structurally valid (issues.length=0) but has BP violations + // (e.g. STRUCT-SUMMARY-001 — no tag). collectAllViolations must + // include tc.best_practices_violations so the stop-decision safety hedge + // sees the remaining work; otherwise stop fires while BP issues remain. + const result = server.call('provar_testsuite_validate', { + suite_name: 'StopSuite', + test_cases: [TC_VALID], + }); + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + assert.notEqual( + body['recommended_next_action'], + 'stop', + `Expected NOT stop while BP violations remain, got: ${String(body['recommended_next_action'])}` + ); + }); + }); + + describe('PDX-471 — baseline_run_id diff mode', () => { + it('run_id is present in every standard response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'RunIdSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + assert.ok(typeof body['run_id'] === 'string' && body['run_id'].length > 0); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'DiffSuite', + test_cases: [TC_LOGIN], + baseline_run_id: 'nonexistent-run-id-xyz', + }); + assert.equal(isError(result), true); + const body = parseText(result); + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', () => { + // First call to establish baseline + const first = server.call('provar_testsuite_validate', { + suite_name: 'BaselineSuite', + test_cases: [TC_LOGIN], + }); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + // Second call with baseline_run_id should return diff + const second = server.call('provar_testsuite_validate', { + suite_name: 'BaselineSuite', + test_cases: [TC_LOGIN], + baseline_run_id: runId, + }); + assert.equal(isError(second), false); + const diffBody = parseText(second); + assert.ok('added' in diffBody, 'diff should have added'); + assert.ok('resolved' in diffBody, 'diff should have resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should have unchanged_count'); + assert.ok('run_id' in diffBody, 'diff should have run_id'); + }); + }); }); diff --git a/test/unit/mcp/tokenMeta.test.ts b/test/unit/mcp/tokenMeta.test.ts new file mode 100644 index 00000000..b94bf8e7 --- /dev/null +++ b/test/unit/mcp/tokenMeta.test.ts @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +import { strict as assert } from 'node:assert'; +import { + createDepthGuardState, + wrapWithDepthGuard, + attachMeta, + estimateTokens, + type ToolResult, + type AnyToolCallback, +} from '../../../src/mcp/utils/tokenMeta.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeHandler(response: ToolResult): AnyToolCallback { + return () => response; +} + +const okResponse: ToolResult = { + content: [{ type: 'text', text: '{"ok":true}' }], + structuredContent: { ok: true }, +}; + +const errResponse: ToolResult = { + isError: true, + content: [{ type: 'text', text: '{"error":"oops"}' }], + structuredContent: { error: 'oops' }, +}; + +function withMeta(enabled: boolean, fn: () => void): void { + const prev = process.env['PROVAR_MCP_EMIT_TOKEN_META']; + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = enabled ? 'true' : 'false'; + try { + fn(); + } finally { + if (prev === undefined) { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + } else { + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = prev; + } + } +} + +// --------------------------------------------------------------------------- +// wrapWithDepthGuard +// --------------------------------------------------------------------------- + +describe('wrapWithDepthGuard', () => { + it('allows calls up to the limit', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 3); + const extra = { sessionId: 'sess-1' }; + const results = await Promise.all([wrapped({}, extra), wrapped({}, extra), wrapped({}, extra)]); + for (const result of results) { + assert.strictEqual(result.isError, undefined); + } + }); + + it('fires TOOL_BUDGET_EXCEEDED on the call that exceeds the limit', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 2); + const extra = { sessionId: 'sess-budget' }; + await Promise.all([wrapped({}, extra), wrapped({}, extra)]); + const result = await wrapped({}, extra); + assert.strictEqual(result.isError, true); + const body = JSON.parse(result.content[0].text) as Record; + assert.strictEqual(body['error'], 'TOOL_BUDGET_EXCEEDED'); + assert.strictEqual(body['callsMade'], 2); + assert.strictEqual(body['limit'], 2); + assert.ok(typeof body['suggestion'] === 'string' && body['suggestion'].length > 0); + }); + + it('blocks all subsequent calls once limit is exceeded', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + const extra = { sessionId: 'sess-block' }; + await wrapped({}, extra); + const [r1, r2] = await Promise.all([wrapped({}, extra), wrapped({}, extra)]); + assert.strictEqual(r1.isError, true); + assert.strictEqual(r2.isError, true); + }); + + it('tracks sessions independently', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + await wrapped({}, { sessionId: 'sess-A' }); + const [resultA, resultB] = await Promise.all([ + wrapped({}, { sessionId: 'sess-A' }), + wrapped({}, { sessionId: 'sess-B' }), + ]); + assert.strictEqual(resultA.isError, true); + assert.strictEqual(resultB.isError, undefined); + }); + + it('shares a single anon bucket across calls when sessionId is absent', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + // Stdio transports (Claude Desktop, Cursor) don't pass a sessionId — all such + // calls must share one bucket so the budget actually limits runaway tool use. + await wrapped({}, {}); + const blocked = await wrapped({}, {}); + assert.strictEqual(blocked.isError, true); + const body = JSON.parse(blocked.content[0].text) as Record; + assert.strictEqual(body['error'], 'TOOL_BUDGET_EXCEEDED'); + }); + + it('keeps named sessions independent from the anon bucket', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 1); + await wrapped({}, {}); // anon bucket uses its 1 call + const namedResult = await wrapped({}, { sessionId: 'sess-named' }); + assert.strictEqual(namedResult.isError, undefined); + }); + + it('includes a non-empty suggestion in TOOL_BUDGET_EXCEEDED', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, 0); + const result = await wrapped({}, { sessionId: 'sess-hint' }); + const body = JSON.parse(result.content[0].text) as Record; + assert.ok(typeof body['suggestion'] === 'string' && body['suggestion'].length > 10); + }); + + it('evicts the oldest session when MAX_SESSIONS (1000) is reached', async () => { + const state = createDepthGuardState(); + const limit = 1; + const wrapped = wrapWithDepthGuard('tool', makeHandler(okResponse), state, limit); + + // Fill up to 1000 sessions + await Promise.all(Array.from({ length: 1000 }, (_, i) => wrapped({}, { sessionId: `fill-${i}` }))); + assert.strictEqual(state.size, 1000); + + // Adding a 1001st session should evict the oldest (fill-0). + await wrapped({}, { sessionId: 'newcomer' }); + assert.strictEqual(state.size, 1000); + assert.strictEqual(state.has('fill-0'), false); + assert.strictEqual(state.has('newcomer'), true); + }); +}); + +// --------------------------------------------------------------------------- +// attachMeta +// --------------------------------------------------------------------------- + +describe('attachMeta', () => { + it('attaches _meta when PROVAR_MCP_EMIT_TOKEN_META=true', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta, '_meta should be present'); + assert.strictEqual(meta['tool'], 'my_tool'); + assert.strictEqual(meta['detailLevel'], 'standard'); + assert.ok(typeof meta['estimatedTokens'] === 'number' && meta['estimatedTokens'] > 0); + }); + }); + + it('returns response unchanged when PROVAR_MCP_EMIT_TOKEN_META is not "true"', () => { + withMeta(false, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result, okResponse); + }); + }); + + it('returns response unchanged when env var is absent', () => { + const prev = process.env['PROVAR_MCP_EMIT_TOKEN_META']; + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + try { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result, okResponse); + } finally { + if (prev !== undefined) process.env['PROVAR_MCP_EMIT_TOKEN_META'] = prev; + } + }); + + it('attaches _meta on error responses', () => { + withMeta(true, () => { + const result = attachMeta(errResponse, 'my_tool', 'full'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.strictEqual(meta['detailLevel'], 'full'); + }); + }); + + it('includes sessionTotalEstimatedTokens when provided', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard', 999); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['sessionTotalEstimatedTokens'], 999); + }); + }); + + it('does not include sessionTotalEstimatedTokens when not provided', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual('sessionTotalEstimatedTokens' in meta, false); + }); + }); + + it('does not modify content[0].text', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + assert.strictEqual(result.content[0].text, okResponse.content[0].text); + }); + }); + + it('estimated_tokens is within ±50% of actual JSON length / 4', () => { + withMeta(true, () => { + const result = attachMeta(okResponse, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + const estimate = meta['estimatedTokens'] as number; + const actual = Math.ceil(JSON.stringify(okResponse).length / 4); + assert.ok( + estimate >= actual * 0.5 && estimate <= actual * 1.5, + `estimate ${estimate} should be within ±50% of ${actual}` + ); + }); + }); +}); + +// --------------------------------------------------------------------------- +// estimateTokens +// --------------------------------------------------------------------------- + +describe('estimateTokens', () => { + it('returns a positive integer', () => { + const tokens = estimateTokens({ hello: 'world' }); + assert.ok(Number.isInteger(tokens) && tokens > 0); + }); + + it('returns ceil(len/4) of JSON string', () => { + const obj = { a: 1 }; + const expected = Math.ceil(JSON.stringify(obj).length / 4); + assert.strictEqual(estimateTokens(obj), expected); + }); +}); + +// --------------------------------------------------------------------------- +// Integration: wrapWithDepthGuard + attachMeta +// --------------------------------------------------------------------------- + +describe('integration: wrapWithDepthGuard + attachMeta', () => { + beforeEach(() => { + process.env['PROVAR_MCP_EMIT_TOKEN_META'] = 'true'; + }); + afterEach(() => { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + }); + + it('attaches _meta on successful tool call', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-1' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.strictEqual(meta['tool'], 'my_tool'); + }); + + it('attaches _meta on TOOL_BUDGET_EXCEEDED error', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 0); + const result = await wrapped({}, { sessionId: 'int-err' }); + assert.strictEqual(result.isError, true); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.ok(meta); + assert.ok('sessionTotalEstimatedTokens' in meta); + }); + + it('uses detail arg from args when present', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({ detail: 'summary' }, { sessionId: 'int-detail' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['detailLevel'], 'summary'); + }); + + it('defaults detail_level to "standard" when detail arg is absent', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-nodetail' }); + const meta = (result.structuredContent as Record)['_meta'] as Record; + assert.strictEqual(meta['detailLevel'], 'standard'); + }); + + it('preserves existing structuredContent keys alongside _meta', async () => { + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-preserve' }); + const sc = result.structuredContent as Record; + assert.strictEqual(sc['ok'], true); + assert.ok(sc['_meta']); + }); + + it('does not attach _meta when env var is disabled', async () => { + delete process.env['PROVAR_MCP_EMIT_TOKEN_META']; + const state = createDepthGuardState(); + const wrapped = wrapWithDepthGuard('my_tool', makeHandler(okResponse), state, 50); + const result = await wrapped({}, { sessionId: 'int-disabled' }); + const sc = result.structuredContent as Record; + assert.strictEqual('_meta' in sc, false); + }); + + it('propagates handler errors', async () => { + const state = createDepthGuardState(); + const throwingHandler: AnyToolCallback = () => { + throw new Error('handler blew up'); + }; + const wrapped = wrapWithDepthGuard('my_tool', throwingHandler, state, 50); + await assert.rejects(async () => wrapped({}, { sessionId: 'int-throw' }), /handler blew up/); + }); +}); diff --git a/test/unit/mcp/validationDiff.test.ts b/test/unit/mcp/validationDiff.test.ts new file mode 100644 index 00000000..8560147f --- /dev/null +++ b/test/unit/mcp/validationDiff.test.ts @@ -0,0 +1,231 @@ +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { describe, it, beforeEach, afterEach } from 'mocha'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + computeContextHash, + resolveValidationDir, +} from '../../../src/mcp/utils/validationDiff.js'; + +const V1 = { rule_id: 'RULE-001', applies_to: 'TestSuite', message: 'Suite is empty' }; +const V2 = { rule_id: 'RULE-002', applies_to: 'TestPlan', message: 'Plan has no suites' }; +const V3 = { rule_id: 'RULE-003', applies_to: 'Project', message: 'No test plans' }; + +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'valdiff-test-')); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('generateRunId', () => { + it('produces a timestamp-hash string', () => { + const id = generateRunId('/some/project/path'); + assert.match(id, /^\d+-[0-9a-f]{8}-[0-9a-z]{4}$/); + }); + + it('produces different IDs for different contexts', () => { + const id1 = generateRunId('/path/a'); + const id2 = generateRunId('/path/b'); + // hash portion differs + assert.notEqual(id1.split('-')[1], id2.split('-')[1]); + }); +}); + +describe('hasAnyRun', () => { + it('returns false when no index file exists', () => { + assert.equal(hasAnyRun(tmpDir), false); + }); + + it('returns true after a run is saved', () => { + saveRun(tmpDir, generateRunId('ctx'), [V1]); + assert.equal(hasAnyRun(tmpDir), true); + }); +}); + +describe('saveRun / loadBaselineViolations', () => { + it('saves and retrieves violations by run_id', () => { + const runId = generateRunId('ctx'); + saveRun(tmpDir, runId, [V1, V2]); + const loaded = loadBaselineViolations(tmpDir, runId); + assert.deepEqual(loaded, [V1, V2]); + }); + + it('returns null for an unknown run_id', () => { + const result = loadBaselineViolations(tmpDir, 'nonexistent-run-id'); + assert.equal(result, null); + }); + + it('caps index at 20 entries and evicts the oldest', () => { + const ids: string[] = []; + for (let i = 0; i < 22; i++) { + const id = `${Date.now() + i}-abc${i.toString().padStart(4, '0')}`; + ids.push(id); + saveRun(tmpDir, id, [V1]); + } + // First two should be evicted + assert.equal(loadBaselineViolations(tmpDir, ids[0]), null); + assert.equal(loadBaselineViolations(tmpDir, ids[1]), null); + // Last 20 should still be present + for (let i = 2; i < 22; i++) { + assert.notEqual(loadBaselineViolations(tmpDir, ids[i]), null, `Expected run ${i} to be present`); + } + }); +}); + +describe('computeDiff', () => { + it('returns empty diff when violations are identical', () => { + const diff = computeDiff([V1, V2], [V1, V2]); + assert.deepEqual(diff.added, []); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 2); + }); + + it('detects added violations', () => { + const diff = computeDiff([V1], [V1, V2]); + assert.equal(diff.added.length, 1); + assert.equal(diff.added[0]['rule_id'], 'RULE-002'); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 1); + }); + + it('detects resolved violations', () => { + const diff = computeDiff([V1, V2], [V2]); + assert.deepEqual(diff.added, []); + assert.equal(diff.resolved.length, 1); + assert.equal(diff.resolved[0]['rule_id'], 'RULE-001'); + assert.equal(diff.unchanged_count, 1); + }); + + it('detects added and resolved in the same diff', () => { + const diff = computeDiff([V1, V2], [V2, V3]); + assert.equal(diff.added.length, 1); + assert.equal(diff.added[0]['rule_id'], 'RULE-003'); + assert.equal(diff.resolved.length, 1); + assert.equal(diff.resolved[0]['rule_id'], 'RULE-001'); + assert.equal(diff.unchanged_count, 1); + }); + + it('handles empty baseline (all current violations are added)', () => { + const diff = computeDiff([], [V1, V2]); + assert.equal(diff.added.length, 2); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 0); + }); + + it('handles empty current (all baseline violations are resolved)', () => { + const diff = computeDiff([V1, V2], []); + assert.deepEqual(diff.added, []); + assert.equal(diff.resolved.length, 2); + assert.equal(diff.unchanged_count, 0); + }); + + it('multiset: duplicate violations are treated as distinct entries', () => { + // V1 appears twice in baseline, three times in current → 1 added, 2 unchanged + const diff = computeDiff([V1, V1], [V1, V1, V1]); + assert.equal(diff.added.length, 1, 'one extra occurrence added'); + assert.equal(diff.resolved.length, 0); + assert.equal(diff.unchanged_count, 2); + }); + + it('multiset: reducing duplicate count registers as resolved', () => { + // V1 appears three times in baseline, once in current → 2 resolved, 1 unchanged + const diff = computeDiff([V1, V1, V1], [V1]); + assert.equal(diff.added.length, 0); + assert.equal(diff.resolved.length, 2, 'two occurrences resolved'); + assert.equal(diff.unchanged_count, 1); + }); +}); + +// ── H3: cross-context scoping ───────────────────────────────────────────────── + +describe('computeContextHash', () => { + it('is deterministic for the same tool+context', () => { + assert.equal(computeContextHash('tc', '/a/b/c.testcase'), computeContextHash('tc', '/a/b/c.testcase')); + }); + + it('differs for different tools with the same context', () => { + assert.notEqual(computeContextHash('tc', '/path'), computeContextHash('suite', '/path')); + }); + + it('differs for different contexts under the same tool', () => { + assert.notEqual(computeContextHash('tc', '/a'), computeContextHash('tc', '/b')); + }); +}); + +describe('loadBaselineViolations — context scoping (H3)', () => { + it('returns null when expectedContextHash does not match the saved record', () => { + const ctxA = computeContextHash('tc', '/project/a/x.testcase'); + const ctxB = computeContextHash('tc', '/project/b/y.testcase'); + const runId = generateRunId('/project/a/x.testcase'); + saveRun(tmpDir, runId, [V1], ctxA); + + // Same store, same run_id, different context → should be rejected + assert.equal(loadBaselineViolations(tmpDir, runId, ctxB), null); + }); + + it('returns the violations when expectedContextHash matches', () => { + const ctx = computeContextHash('tc', '/project/a/x.testcase'); + const runId = generateRunId('/project/a/x.testcase'); + saveRun(tmpDir, runId, [V1, V2], ctx); + + const loaded = loadBaselineViolations(tmpDir, runId, ctx); + assert.deepEqual(loaded, [V1, V2]); + }); + + it('treats records written without context_hash as a mismatch when one is expected', () => { + // Simulate a record persisted by an older version (no context_hash) + const runId = generateRunId('/legacy/path'); + saveRun(tmpDir, runId, [V1]); // omit contextHash + const ctx = computeContextHash('tc', '/legacy/path'); + assert.equal(loadBaselineViolations(tmpDir, runId, ctx), null); + }); + + it('still loads when no expectedContextHash is provided (back-compat)', () => { + const runId = generateRunId('/path'); + saveRun(tmpDir, runId, [V1]); // no context hash + assert.deepEqual(loadBaselineViolations(tmpDir, runId), [V1]); + }); +}); + +describe('resolveValidationDir', () => { + let saved: string | undefined; + + beforeEach(() => { + saved = process.env['PROVAR_MCP_VALIDATION_DIR']; + delete process.env['PROVAR_MCP_VALIDATION_DIR']; + }); + + afterEach(() => { + if (saved !== undefined) process.env['PROVAR_MCP_VALIDATION_DIR'] = saved; + else delete process.env['PROVAR_MCP_VALIDATION_DIR']; + }); + + it('defaults to ~/.provardx/validation/ when env override is unset', () => { + const dir = resolveValidationDir('testcase'); + const expected = path.join(os.homedir(), '.provardx', 'validation', 'testcase'); + assert.equal(dir, expected); + }); + + it('honors PROVAR_MCP_VALIDATION_DIR when set', () => { + process.env['PROVAR_MCP_VALIDATION_DIR'] = path.join(tmpDir, 'custom-root'); + const dir = resolveValidationDir('testsuite'); + assert.equal(dir, path.join(tmpDir, 'custom-root', 'testsuite')); + }); + + it('trims whitespace and falls back to default when env is whitespace-only', () => { + process.env['PROVAR_MCP_VALIDATION_DIR'] = ' '; + const dir = resolveValidationDir('testcase'); + assert.equal(dir, path.join(os.homedir(), '.provardx', 'validation', 'testcase')); + }); +}); diff --git a/test/unit/mcp/validationScore.test.ts b/test/unit/mcp/validationScore.test.ts new file mode 100644 index 00000000..b7074c95 --- /dev/null +++ b/test/unit/mcp/validationScore.test.ts @@ -0,0 +1,57 @@ +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'mocha'; +import { calcCompletenessScore, calcNextAction } from '../../../src/mcp/utils/validationScore.js'; + +describe('calcCompletenessScore', () => { + it('returns 100 when all tests pass', () => { + assert.equal(calcCompletenessScore(10, 10), 100); + }); + + it('returns 0 when no tests pass', () => { + assert.equal(calcCompletenessScore(0, 10), 0); + }); + + it('returns 0 when total is 0 (no tests)', () => { + assert.equal(calcCompletenessScore(0, 0), 0); + }); + + it('rounds to nearest integer', () => { + // 1/3 ≈ 33.33 → 33 + assert.equal(calcCompletenessScore(1, 3), 33); + // 2/3 ≈ 66.67 → 67 + assert.equal(calcCompletenessScore(2, 3), 67); + }); + + it('returns 50 for half passing', () => { + assert.equal(calcCompletenessScore(5, 10), 50); + }); +}); + +describe('calcNextAction', () => { + it('returns "stop" when score is 100 and no violations remain', () => { + assert.equal(calcNextAction(100, true), 'stop'); + assert.equal(calcNextAction(100, false), 'stop'); + assert.equal(calcNextAction(100, true, 0), 'stop'); + }); + + it('returns "inspect_failures" when score < 100 and no baseline (first run)', () => { + assert.equal(calcNextAction(0, false), 'inspect_failures'); + assert.equal(calcNextAction(50, false), 'inspect_failures'); + assert.equal(calcNextAction(99, false), 'inspect_failures'); + }); + + it('returns "fix_and_revalidate" when score < 100 and baseline exists', () => { + assert.equal(calcNextAction(0, true), 'fix_and_revalidate'); + assert.equal(calcNextAction(50, true), 'fix_and_revalidate'); + assert.equal(calcNextAction(99, true), 'fix_and_revalidate'); + }); + + it('returns "fix_and_revalidate" when score is 100 but quality violations remain and baseline exists', () => { + assert.equal(calcNextAction(100, true, 3), 'fix_and_revalidate'); + }); + + it('returns "inspect_failures" when score is 100 but violations remain on first run', () => { + assert.equal(calcNextAction(100, false, 2), 'inspect_failures'); + }); +});