From a9dc26a9cda1cfd4a95b36ff727c113f32d219eb Mon Sep 17 00:00:00 2001
From: PatrickSys <rossellocolompatrick@gmail.com>
Date: Fri, 17 Apr 2026 16:38:04 +0200
Subject: [PATCH] feat(eval): add edit-preflight discovery lane

---
 scripts/benchmark-comparators.mjs             | 465 +++++++++++++++---
 scripts/lib/managed-mcp-session.mjs           |   2 +-
 scripts/run-eval.mjs                          |  88 +++-
 src/eval/edit-preflight-harness.ts            | 266 ++++++++++
 src/eval/run-config.ts                        |  37 ++
 src/eval/types.ts                             |  96 ++++
 tests/benchmark-comparators.test.ts           |  99 ++++
 tests/edit-preflight-harness.test.ts          | 243 +++++++++
 tests/fixtures/README.md                      |  32 +-
 .../edit-preflight-angular-spotify.json       |  93 ++++
 tests/fixtures/edit-preflight-excalidraw.json |  93 ++++
 tests/run-eval-config.test.ts                 |  25 +
 12 files changed, 1459 insertions(+), 80 deletions(-)
 create mode 100644 src/eval/edit-preflight-harness.ts
 create mode 100644 src/eval/run-config.ts
 create mode 100644 tests/edit-preflight-harness.test.ts
 create mode 100644 tests/fixtures/edit-preflight-angular-spotify.json
 create mode 100644 tests/fixtures/edit-preflight-excalidraw.json
 create mode 100644 tests/run-eval-config.test.ts

diff --git a/scripts/benchmark-comparators.mjs b/scripts/benchmark-comparators.mjs
index 0fbe5ca..e0a8e37 100644
--- a/scripts/benchmark-comparators.mjs
+++ b/scripts/benchmark-comparators.mjs
@@ -1,4 +1,3 @@
-#!/usr/bin/env node
 /**
  * Automated comparator benchmark runner for codebase-context discovery benchmark.
  *
@@ -12,7 +11,7 @@
  */
 
 import path from 'path';
-import { fileURLToPath } from 'url';
+import { fileURLToPath, pathToFileURL } from 'url';
 import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
 import { execSync, execFile } from 'child_process';
 import { parseArgs } from 'util';
@@ -51,6 +50,224 @@ function normalizeText(value) {
   return value.toLowerCase().replace(/\\/g, '/');
 }
 
+function normalizeRelativePath(candidate) {
+  if (typeof candidate !== 'string') return null;
+  const trimmed = candidate.trim().replace(/^["']|["']$/g, '');
+  if (!trimmed) return null;
+  const normalized = trimmed.replace(/\\/g, '/').replace(/^\.\//, '');
+  if (/^[A-Za-z]:\//.test(normalized)) {
+    return normalized.replace(/^[A-Za-z]:\//, '');
+  }
+  return normalized;
+}
+
+function normalizeFilesystemPath(candidate) {
+  if (typeof candidate !== 'string') return null;
+  return candidate.trim().replace(/\\/g, '/').replace(/\/+$/, '').toLowerCase();
+}
+
+function isLikelyCodePath(candidate) {
+  if (typeof candidate !== 'string') return false;
+  if (!candidate.includes('/')) return false;
+  const lastSegment = candidate.split('/').pop() ?? '';
+  return /\.[A-Za-z0-9]+$/.test(lastSegment);
+}
+
+function collectTopFiles(value, sink = []) {
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      collectTopFiles(item, sink);
+    }
+    return sink;
+  }
+
+  if (value && typeof value === 'object') {
+    for (const [key, nested] of Object.entries(value)) {
+      if (
+        (key === 'file' || key === 'filePath' || key === 'path' || key === 'source') &&
+        typeof nested === 'string'
+      ) {
+        const normalized = normalizeRelativePath(nested);
+        if (normalized && isLikelyCodePath(normalized) && !sink.includes(normalized)) {
+          sink.push(normalized);
+        }
+      }
+      collectTopFiles(nested, sink);
+    }
+    return sink;
+  }
+
+  if (typeof value === 'string') {
+    const matches = value.match(/[A-Za-z0-9_.-]+(?:\/[A-Za-z0-9_.-]+)+\.[A-Za-z0-9]+/g) ?? [];
+    for (const match of matches) {
+      const normalized = normalizeRelativePath(match);
+      if (normalized && !sink.includes(normalized)) {
+        sink.push(normalized);
+      }
+    }
+  }
+
+  return sink;
+}
+
+function extractBestExample(value) {
+  if (!value || typeof value !== 'object') return null;
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      const candidate = extractBestExample(item);
+      if (candidate) return candidate;
+    }
+    return null;
+  }
+
+  for (const [key, nested] of Object.entries(value)) {
+    if (
+      (key === 'bestExample' || key === 'best_example' || key === 'goldenFile' || key === 'example') &&
+      typeof nested === 'string'
+    ) {
+      const normalized = normalizeRelativePath(nested);
+      if (normalized) return normalized;
+    }
+    const candidate = extractBestExample(nested);
+    if (candidate) return candidate;
+  }
+
+  return null;
+}
+
+function extractPayloadText(result) {
+  const parts = [];
+  if (Array.isArray(result?.content)) {
+    for (const item of result.content) {
+      if (typeof item?.text === 'string' && item.text.trim()) {
+        parts.push(item.text.trim());
+      }
+    }
+  }
+  if (result?.structuredContent !== undefined) {
+    parts.push(JSON.stringify(result.structuredContent, null, 2));
+  }
+  if (parts.length === 0) {
+    parts.push(JSON.stringify(result));
+  }
+  return parts.join('\n');
+}
+
+function extractMcpResponse(result) {
+  const topFiles = collectTopFiles(result?.structuredContent ?? result);
+  const bestExample = extractBestExample(result?.structuredContent ?? result) ?? topFiles[0] ?? null;
+  return {
+    payload: extractPayloadText(result),
+    ...(topFiles.length > 0 && { topFiles }),
+    ...(bestExample && { bestExample })
+  };
+}
+
+function parseToolTextPayload(result) {
+  const textParts = Array.isArray(result?.content)
+    ? result.content
+        .map((item) => (typeof item?.text === 'string' ? item.text.trim() : ''))
+        .filter(Boolean)
+    : [];
+  return textParts.join('\n');
+}
+
+function extractIndexedProjectName(listProjectsResult, rootPath) {
+  const payload = parseToolTextPayload(listProjectsResult);
+  if (!payload) return null;
+
+  try {
+    const parsed = JSON.parse(payload);
+    const projects = Array.isArray(parsed.projects) ? parsed.projects : [];
+    const normalizedRootPath = normalizeFilesystemPath(rootPath);
+    const match = projects.find(
+      (project) => normalizeFilesystemPath(project.root_path) === normalizedRootPath
+    );
+    return typeof match?.name === 'string' ? match.name : null;
+  } catch {
+    return null;
+  }
+}
+
+function matchPatterns(candidates, patterns) {
+  if (!patterns || patterns.length === 0) return null;
+  const normalizedPatterns = patterns.map(normalizeText);
+  for (let index = 0; index < candidates.length; index++) {
+    const normalizedCandidate = normalizeText(candidates[index]);
+    if (normalizedPatterns.some((pattern) => normalizedCandidate.includes(pattern))) {
+      return index + 1;
+    }
+  }
+  return null;
+}
+
+export function buildRawClaudePrompt(task, rootPath) {
+  const query = task.args?.query ?? task.prompt;
+  const intent =
+    task.surface === 'search_codebase'
+      ? 'search'
+      : task.surface === 'get_team_patterns'
+        ? 'find local conventions'
+        : 'map/orient to the repository';
+
+  return [
+    `You are exploring a codebase at ${path.resolve(rootPath)}.`,
+    `Use only Read, Grep, and Glob tools to ${intent}.`,
+    `Question: ${query}`,
+    'Return strict JSON with this shape:',
+    '{"answer":"short concrete answer with repo terms","files":["repo-relative path in relevance order"],"bestExample":"repo-relative path or null"}',
+    'Rules:',
+    '- files must be repo-relative and ordered most relevant first',
+    '- answer must include concrete identifiers, files, or patterns from the repo, not generic advice',
+    '- bestExample must be the strongest local example if one exists, otherwise null',
+    '- Output JSON only'
+  ].join('\n');
+}
+
+export function parseRawClaudeStructuredResult(resultText) {
+  const topFiles = [];
+  let bestExample = null;
+  let payload = resultText;
+  const trimmed = typeof resultText === 'string' ? resultText.trim() : '';
+  const fencedJsonMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
+  const candidateJson = fencedJsonMatch ? fencedJsonMatch[1].trim() : trimmed;
+
+  try {
+    const parsed = JSON.parse(candidateJson);
+    if (parsed && typeof parsed === 'object') {
+      if (Array.isArray(parsed.files)) {
+        for (const file of parsed.files) {
+          const normalized = normalizeRelativePath(file);
+          if (normalized && isLikelyCodePath(normalized) && !topFiles.includes(normalized)) {
+            topFiles.push(normalized);
+          }
+        }
+      }
+      const normalizedBestExample = normalizeRelativePath(parsed.bestExample);
+      if (normalizedBestExample) {
+        bestExample = normalizedBestExample;
+      } else if (topFiles.length > 0) {
+        bestExample = topFiles[0];
+      }
+      payload = JSON.stringify(parsed);
+    }
+  } catch {
+    const fallbackFiles = collectTopFiles(resultText);
+    for (const file of fallbackFiles) {
+      if (!topFiles.includes(file)) {
+        topFiles.push(file);
+      }
+    }
+    bestExample = topFiles[0] ?? null;
+  }
+
+  return {
+    payload,
+    ...(topFiles.length > 0 && { topFiles }),
+    ...(bestExample && { bestExample })
+  };
+}
+
 function matchSignals(payload, expectedSignals, forbiddenSignals) {
   const normalizedPayload = normalizeText(payload);
   const matchedSignals = expectedSignals.filter((s) =>
@@ -124,16 +341,26 @@ const COMPARATOR_ADAPTERS = [
     serverArgs: ['--yes', 'codebase-memory-mcp'],
     serverEnv: {},
     initTimeout: 10000,
+    resolveProjectName: true,
     indexTool: null, // auto-indexes on first query
-    searchTool: 'search_code',
-    searchArgs(task) {
-      return { query: task.prompt, mode: 'compact' };
-    },
-    extractPayload(result) {
-      if (Array.isArray(result?.content)) {
-        return result.content.map((c) => (typeof c?.text === 'string' ? c.text : JSON.stringify(c))).join('\n');
+    buildTaskCall(task, { projectName }) {
+      const query = task.args?.query ?? task.prompt;
+      if (task.job === 'map') {
+        return {
+          name: 'get_architecture',
+          arguments: { project: projectName }
+        };
       }
-      return JSON.stringify(result);
+
+      return {
+        name: 'search_graph',
+        arguments: {
+          project: projectName,
+          query,
+          include_connected: true,
+          limit: 10
+        }
+      };
     }
   },
   {
@@ -170,12 +397,7 @@ const COMPARATOR_ADAPTERS = [
         detail_level: 'compact'
       };
     },
-    extractPayload(result) {
-      if (Array.isArray(result?.content)) {
-        return result.content.map((c) => (typeof c?.text === 'string' ? c.text : JSON.stringify(c))).join('\n');
-      }
-      return JSON.stringify(result);
-    }
+    extractPayload: null
   },
   {
     name: 'GrepAI',
@@ -208,12 +430,7 @@ const COMPARATOR_ADAPTERS = [
     searchArgs(task) {
       return { query: task.prompt };
     },
-    extractPayload(result) {
-      if (Array.isArray(result?.content)) {
-        return result.content.map((c) => (typeof c?.text === 'string' ? c.text : JSON.stringify(c))).join('\n');
-      }
-      return JSON.stringify(result);
-    }
+    extractPayload: null
   },
   {
     name: 'CodeGraphContext',
@@ -249,12 +466,7 @@ const COMPARATOR_ADAPTERS = [
       // CodeGraphContext uses cypher-based queries; approximate with a search tool
       return { query: task.prompt };
     },
-    extractPayload(result) {
-      if (Array.isArray(result?.content)) {
-        return result.content.map((c) => (typeof c?.text === 'string' ? c.text : JSON.stringify(c))).join('\n');
-      }
-      return JSON.stringify(result);
-    }
+    extractPayload: null
   },
   {
     name: 'raw Claude Code',
@@ -281,9 +493,7 @@ const COMPARATOR_ADAPTERS = [
     searchArgs(task) {
       return { prompt: task.prompt };
     },
-    extractPayload(result) {
-      return typeof result === 'string' ? result : JSON.stringify(result);
-    }
+    extractPayload: null
   }
 ];
 
@@ -297,6 +507,7 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) {
       serverCommand: adapter.serverCommand,
       serverArgs: adapter.serverArgs,
       serverEnv: adapter.serverEnv,
+      cwd: path.resolve(rootPath),
       connectTimeoutMs: adapter.connectTimeout ?? 15_000
     },
     async ({ client }) => {
@@ -312,6 +523,25 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) {
         throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
       }
 
+      let projectName = null;
+      if (adapter.resolveProjectName && availableTools.some((tool) => tool.name === 'list_projects')) {
+        try {
+          const listProjectsResult = await client.callTool({
+            name: 'list_projects',
+            arguments: {}
+          });
+          projectName = extractIndexedProjectName(listProjectsResult, rootPath);
+        } catch (err) {
+          throw new Error(`Failed to resolve indexed project for ${adapter.name}: ${err.message}`);
+        }
+
+        if (!projectName) {
+          throw new Error(
+            `Could not resolve indexed project for ${adapter.name} at ${path.resolve(rootPath)}`
+          );
+        }
+      }
+
       const toolNames = availableTools.map((t) => t.name);
       let searchToolName = adapter.searchTool;
       if (!searchToolName) {
@@ -348,15 +578,33 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) {
       for (const task of tasks) {
         const startMs = Date.now();
         let payload = '';
+        let topFiles = [];
+        let bestExample = null;
         let toolCallCount = totalToolCalls;
 
         try {
-          const result = await client.callTool({
-            name: searchToolName,
-            arguments: adapter.searchArgs(task)
-          });
+          const request =
+            typeof adapter.buildTaskCall === 'function'
+              ? adapter.buildTaskCall(task, { rootPath, projectName, toolNames })
+              : {
+                  name: searchToolName,
+                  arguments: adapter.searchArgs(task)
+                };
+          const result = await client.callTool(request);
           toolCallCount++;
-          payload = adapter.extractPayload(result);
+          const extracted =
+            typeof adapter.extractPayload === 'function'
+              ? adapter.extractPayload(result)
+              : extractMcpResponse(result);
+          payload = typeof extracted === 'string' ? extracted : extracted.payload;
+          topFiles =
+            extracted && typeof extracted === 'object' && Array.isArray(extracted.topFiles)
+              ? extracted.topFiles
+              : [];
+          bestExample =
+            extracted && typeof extracted === 'object' && typeof extracted.bestExample === 'string'
+              ? extracted.bestExample
+              : topFiles[0] ?? null;
         } catch (err) {
           console.warn(`  [${adapter.name}] Task ${task.id} failed: ${err.message}`);
           payload = '';
@@ -370,6 +618,13 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) {
           task.expectedSignals,
           task.forbiddenSignals
         );
+        const firstRelevantHit = matchPatterns(topFiles, task.expectedFilePatterns);
+        const bestExampleUseful =
+          task.expectedBestExamplePatterns && task.expectedBestExamplePatterns.length > 0
+            ? task.expectedBestExamplePatterns.some((pattern) =>
+                normalizeText(bestExample ?? '').includes(normalizeText(pattern))
+              )
+            : undefined;
 
         taskResults.push({
           taskId: task.id,
@@ -381,7 +636,9 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) {
           payloadBytes,
           estimatedTokens,
           toolCallCount,
-          elapsedMs
+          elapsedMs,
+          ...(firstRelevantHit !== null ? { firstRelevantHit } : {}),
+          ...(typeof bestExampleUseful === 'boolean' ? { bestExampleUseful } : {})
         });
       }
 
@@ -406,25 +663,76 @@ async function runRawClaudeCode(rootPath, tasks) {
   for (const task of tasks) {
     const startMs = Date.now();
     let payload = '';
+    let topFiles = [];
+    let bestExample = null;
 
     try {
-      const prompt = `You are exploring a codebase at ${path.resolve(rootPath)}. Answer this question using only grep, glob, and read file operations: ${task.prompt}`;
-      const { stdout } = await execFileAsync(
-        'claude',
-        ['-p', prompt, '--output-format', 'json', '--allowedTools', 'Read,Grep,Glob'],
-        { timeout: 120000, cwd: path.resolve(rootPath), shell: process.platform === 'win32' }
-      );
+      const prompt = buildRawClaudePrompt(task, rootPath);
+      const commandArgs =
+        process.platform === 'win32'
+          ? [
+              'powershell.exe',
+              [
+                '-NoProfile',
+                '-Command',
+                'claude -p $env:CLAUDE_BENCHMARK_PROMPT --model haiku --effort low --output-format json --allowedTools Read,Grep,Glob'
+              ],
+              {
+                timeout: 120000,
+                cwd: path.resolve(rootPath),
+                windowsHide: true,
+                env: {
+                  ...process.env,
+                  CLAUDE_BENCHMARK_PROMPT: prompt
+                }
+              }
+            ]
+          : [
+              'claude',
+              ['-p', prompt, '--model', 'haiku', '--effort', 'low', '--output-format', 'json', '--allowedTools', 'Read,Grep,Glob'],
+              {
+                timeout: 120000,
+                cwd: path.resolve(rootPath),
+                windowsHide: true
+              }
+            ];
+      const { stdout } = await execFileAsync(commandArgs[0], commandArgs[1], commandArgs[2]);
       try {
         const parsed = JSON.parse(stdout);
-        payload = parsed.result ?? stdout;
+        const extracted = parseRawClaudeStructuredResult(parsed.result ?? stdout);
+        payload = extracted.payload;
+        topFiles = extracted.topFiles ?? [];
+        bestExample = extracted.bestExample ?? null;
       } catch {
-        payload = stdout;
+        const extracted = parseRawClaudeStructuredResult(stdout);
+        payload = extracted.payload;
+        topFiles = extracted.topFiles ?? [];
+        bestExample = extracted.bestExample ?? null;
       }
     } catch (err) {
       if (err.code === 'ENOENT' || err.message?.includes('command not found')) {
         throw new Error('claude CLI not found');
       }
-      console.warn(`  [raw Claude Code] Task ${task.id} error: ${err.message}`);
+      const fallbackStdout = typeof err.stdout === 'string' ? err.stdout.trim() : '';
+      if (fallbackStdout) {
+        try {
+          const parsed = JSON.parse(fallbackStdout);
+          const extracted = parseRawClaudeStructuredResult(parsed.result ?? fallbackStdout);
+          payload = extracted.payload;
+          topFiles = extracted.topFiles ?? [];
+          bestExample = extracted.bestExample ?? null;
+        } catch {
+          const extracted = parseRawClaudeStructuredResult(fallbackStdout);
+          payload = extracted.payload;
+          topFiles = extracted.topFiles ?? [];
+          bestExample = extracted.bestExample ?? null;
+        }
+      }
+
+      if (!payload) {
+        const stderr = typeof err.stderr === 'string' ? err.stderr.trim() : '';
+        console.warn(`  [raw Claude Code] Task ${task.id} error: ${stderr || err.message}`);
+      }
     }
 
     const elapsedMs = Date.now() - startMs;
@@ -435,6 +743,13 @@ async function runRawClaudeCode(rootPath, tasks) {
       task.expectedSignals,
       task.forbiddenSignals
     );
+    const firstRelevantHit = matchPatterns(topFiles, task.expectedFilePatterns);
+    const bestExampleUseful =
+      task.expectedBestExamplePatterns && task.expectedBestExamplePatterns.length > 0
+        ? task.expectedBestExamplePatterns.some((pattern) =>
+            normalizeText(bestExample ?? '').includes(normalizeText(pattern))
+          )
+        : undefined;
 
     taskResults.push({
       taskId: task.id,
@@ -446,7 +761,9 @@ async function runRawClaudeCode(rootPath, tasks) {
       payloadBytes,
       estimatedTokens,
       toolCallCount: null,
-      elapsedMs
+      elapsedMs,
+      ...(firstRelevantHit !== null ? { firstRelevantHit } : {}),
+      ...(typeof bestExampleUseful === 'boolean' ? { bestExampleUseful } : {})
     });
   }
 
@@ -457,26 +774,56 @@ async function runRawClaudeCode(rootPath, tasks) {
 // Aggregate task results into DiscoveryComparatorMetrics shape
 // ---------------------------------------------------------------------------
 
-function aggregateResults(taskResults) {
+export function aggregateResults(taskResults) {
   const n = taskResults.length;
-  if (n === 0) return { averageUsefulness: null, averagePayloadBytes: null, averageEstimatedTokens: null, averageFirstRelevantHit: null, bestExampleUsefulnessRate: null };
+  if (n === 0) {
+    return {
+      averageUsefulness: null,
+      averagePayloadBytes: null,
+      averageEstimatedTokens: null,
+      averageFirstRelevantHit: null,
+      bestExampleUsefulnessRate: null,
+      status: 'pending_evidence',
+      reason: 'No comparator task results were produced'
+    };
+  }
 
   const avgUsefulness = taskResults.reduce((s, r) => s + r.usefulnessScore, 0) / n;
   const avgBytes = taskResults.reduce((s, r) => s + r.payloadBytes, 0) / n;
   const avgTokens = taskResults.reduce((s, r) => s + r.estimatedTokens, 0) / n;
+  const searchHits = taskResults
+    .map((r) => r.firstRelevantHit)
+    .filter((value) => typeof value === 'number');
+  const bestExampleResults = taskResults
+    .map((r) => r.bestExampleUseful)
+    .filter((value) => typeof value === 'boolean');
 
   const toolCallCounts = taskResults.map((r) => r.toolCallCount).filter((v) => typeof v === 'number');
   const elapsedMsList = taskResults.map((r) => r.elapsedMs).filter((v) => typeof v === 'number');
+  const hasMeaningfulEvidence = taskResults.some(
+    (result) =>
+      result.usefulnessScore > 0 ||
+      typeof result.firstRelevantHit === 'number' ||
+      result.bestExampleUseful === true
+  );
+  const status = hasMeaningfulEvidence ? 'ok' : 'pending_evidence';
 
   return {
     averageUsefulness: avgUsefulness,
     averagePayloadBytes: avgBytes,
     averageEstimatedTokens: avgTokens,
-    averageFirstRelevantHit: null, // comparators don't expose ranked file lists in standard MCP responses
-    bestExampleUsefulnessRate: null,
+    averageFirstRelevantHit:
+      searchHits.length > 0 ? searchHits.reduce((sum, value) => sum + value, 0) / searchHits.length : null,
+    bestExampleUsefulnessRate:
+      bestExampleResults.length > 0
+        ? bestExampleResults.filter(Boolean).length / bestExampleResults.length
+        : null,
     averageToolCallCount: toolCallCounts.length > 0 ? toolCallCounts.reduce((s, v) => s + v, 0) / toolCallCounts.length : null,
     averageElapsedMs: elapsedMsList.length > 0 ? elapsedMsList.reduce((s, v) => s + v, 0) / elapsedMsList.length : null,
-    status: 'ok',
+    status,
+    ...(status === 'pending_evidence'
+      ? { reason: 'Comparator returned task payloads, but none contained usable benchmark evidence' }
+      : {}),
     taskResults
   };
 }
@@ -680,7 +1027,13 @@ async function main() {
   }
 }
 
-main().catch((err) => {
-  console.error('Fatal:', err);
-  process.exit(2);
-});
+const isMain =
+  process.argv[1] &&
+  import.meta.url === pathToFileURL(path.resolve(process.argv[1])).href;
+
+if (isMain) {
+  main().catch((err) => {
+    console.error('Fatal:', err);
+    process.exit(2);
+  });
+}
diff --git a/scripts/lib/managed-mcp-session.mjs b/scripts/lib/managed-mcp-session.mjs
index 5f54e55..97c8106 100644
--- a/scripts/lib/managed-mcp-session.mjs
+++ b/scripts/lib/managed-mcp-session.mjs
@@ -2,7 +2,7 @@ import process from 'node:process';
 
 async function loadSdkClient() {
   const [{ Client }, { StdioClientTransport }] = await Promise.all([
-    import('@modelcontextprotocol/sdk/client/index.js'),
+    import('@modelcontextprotocol/sdk/client'),
     import('@modelcontextprotocol/sdk/client/stdio.js')
   ]);
 
diff --git a/scripts/run-eval.mjs b/scripts/run-eval.mjs
index ef82204..6f5b706 100644
--- a/scripts/run-eval.mjs
+++ b/scripts/run-eval.mjs
@@ -11,12 +11,18 @@ import { analyzerRegistry } from '../dist/core/analyzer-registry.js';
 import { AngularAnalyzer } from '../dist/analyzers/angular/index.js';
 import { GenericAnalyzer } from '../dist/analyzers/generic/index.js';
 import { evaluateFixture, formatEvalReport } from '../dist/eval/harness.js';
+import {
+  combineEditPreflightSummaries,
+  evaluateEditPreflightFixture,
+  formatEditPreflightReport
+} from '../dist/eval/edit-preflight-harness.js';
 import {
   combineDiscoverySummaries,
   evaluateDiscoveryGate,
   evaluateDiscoveryFixture,
   formatDiscoveryReport
 } from '../dist/eval/discovery-harness.js';
+import { getDefaultFixturePaths, resolveEvalMode } from '../dist/eval/run-config.js';
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const projectRoot = path.join(__dirname, '..');
@@ -24,20 +30,6 @@ const packageJsonPath = path.join(projectRoot, 'package.json');
 
 const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
 
-const defaultFixtureA = path.join(projectRoot, 'tests', 'fixtures', 'eval-angular-spotify.json');
-const defaultFixtureB = path.join(projectRoot, 'tests', 'fixtures', 'eval-controlled.json');
-const defaultDiscoveryFixtureA = path.join(
-  projectRoot,
-  'tests',
-  'fixtures',
-  'discovery-angular-spotify.json'
-);
-const defaultDiscoveryFixtureB = path.join(
-  projectRoot,
-  'tests',
-  'fixtures',
-  'discovery-excalidraw.json'
-);
 const defaultDiscoveryProtocol = path.join(
   projectRoot,
   'tests',
@@ -49,7 +41,7 @@ const usage = [
   `Usage: node scripts/run-eval.mjs <codebaseA> [codebaseB] [options]`,
   ``,
   `Options:`,
-  `  --mode=<retrieval|discovery>  Select benchmark mode (default: retrieval)`,
+  `  --mode=<retrieval|discovery|edit-preflight>  Select benchmark mode (default: retrieval)`,
   `  --fixture-a=<path>  Override fixture for codebaseA`,
   `  --fixture-b=<path>  Override fixture for codebaseB`,
   `  --protocol=<path>   Override discovery benchmark protocol`,
@@ -151,6 +143,17 @@ async function runSingleEvaluation({
       fixturePath: resolvedFixture,
       summary
     });
+  } else if (mode === 'edit-preflight') {
+    console.log(`\n--- Phase 2: Running ${fixture.tasks.length}-task edit-preflight harness ---`);
+    summary = await evaluateEditPreflightFixture({
+      fixture,
+      rootPath: resolvedCodebase
+    });
+    report = formatEditPreflightReport({
+      codebaseLabel: label,
+      fixturePath: resolvedFixture,
+      summary
+    });
   } else {
     console.log(`\n--- Phase 2: Running ${fixture.queries.length}-query eval harness ---`);
     const searcher = new CodebaseSearcher(resolvedCodebase);
@@ -202,6 +205,31 @@ function printCombinedSummary(summaries, mode) {
     return;
   }
 
+  if (mode === 'edit-preflight') {
+    const combined = combineEditPreflightSummaries(summaries);
+    console.log(`\n=== Combined Edit Preflight Summary ===`);
+    console.log(
+      `Top-target in top-3: ${combined.topTargetInTop3Count}/${combined.targetableTasks} (${combined.topTargetInTop3Rate === null ? 'n/a' : (combined.topTargetInTop3Rate * 100).toFixed(0) + '%'})`
+    );
+    console.log(
+      `Average first relevant hit: ${combined.averageFirstRelevantHit === null ? 'n/a' : combined.averageFirstRelevantHit.toFixed(2)}`
+    );
+    console.log(
+      `Best-example hit rate: ${combined.bestExampleHitCount}/${combined.bestExampleTasks} (${combined.bestExampleHitRate === null ? 'n/a' : (combined.bestExampleHitRate * 100).toFixed(0) + '%'})`
+    );
+    console.log(
+      `Safe ready rate: ${combined.safeTaskReadyCount}/${combined.safeTasks} (${combined.safeTaskReadyRate === null ? 'n/a' : (combined.safeTaskReadyRate * 100).toFixed(0) + '%'})`
+    );
+    console.log(
+      `Unsafe abstain rate: ${combined.unsafeTaskAbstainCount}/${combined.unsafeTasks} (${combined.unsafeTaskAbstainRate === null ? 'n/a' : (combined.unsafeTaskAbstainRate * 100).toFixed(0) + '%'})`
+    );
+    console.log(
+      `Unsafe ready=true false positives: ${combined.unsafeReadyFalsePositiveCount}/${combined.unsafeTasks} (${combined.unsafeReadyFalsePositiveRate === null ? 'n/a' : (combined.unsafeReadyFalsePositiveRate * 100).toFixed(0) + '%'})`
+    );
+    console.log(`=======================================\n`);
+    return;
+  }
+
   const total = summaries.reduce((sum, summary) => sum + summary.total, 0);
   const top1Correct = summaries.reduce((sum, summary) => sum + summary.top1Correct, 0);
   const top3RecallCount = summaries.reduce((sum, summary) => sum + summary.top3RecallCount, 0);
@@ -254,17 +282,14 @@ async function main() {
 
   const codebaseA = positionals[0];
   const codebaseB = positionals[1];
-  const mode = values.mode === 'discovery' ? 'discovery' : 'retrieval';
+  const mode = resolveEvalMode(values.mode);
+  const defaultFixtures = getDefaultFixturePaths(projectRoot, mode);
   const fixtureA = values['fixture-a']
     ? path.resolve(values['fixture-a'])
-    : mode === 'discovery'
-      ? defaultDiscoveryFixtureA
-      : defaultFixtureA;
+    : defaultFixtures.fixtureA;
   const fixtureB = values['fixture-b']
     ? path.resolve(values['fixture-b'])
-    : mode === 'discovery'
-      ? defaultDiscoveryFixtureB
-      : defaultFixtureB;
+    : defaultFixtures.fixtureB;
   const protocolPath = values.protocol
     ? path.resolve(values.protocol)
     : defaultDiscoveryProtocol;
@@ -326,6 +351,25 @@ async function main() {
     process.exit(gate.status === 'failed' ? 1 : 0);
   }
 
+  if (mode === 'edit-preflight') {
+    const combinedSummary = combineEditPreflightSummaries(summaries);
+    printCombinedSummary(summaries, mode);
+    console.log(
+      formatEditPreflightReport({
+        codebaseLabel: 'combined-suite',
+        fixturePath: codebaseB ? `${fixtureA}, ${fixtureB}` : fixtureA,
+        summary: combinedSummary
+      })
+    );
+    if (outputPath) {
+      const outputDir = path.dirname(outputPath);
+      if (!existsSync(outputDir)) mkdirSync(outputDir, { recursive: true });
+      writeFileSync(outputPath, JSON.stringify(combinedSummary, null, 2));
+      console.log(`\nResults written to: ${outputPath}`);
+    }
+    process.exit(0);
+  }
+
   if (outputPath && mode === 'discovery' && summaries.length === 1) {
     const outputDir = path.dirname(outputPath);
     if (!existsSync(outputDir)) mkdirSync(outputDir, { recursive: true });
diff --git a/src/eval/edit-preflight-harness.ts b/src/eval/edit-preflight-harness.ts
new file mode 100644
index 0000000..4f34588
--- /dev/null
+++ b/src/eval/edit-preflight-harness.ts
@@ -0,0 +1,266 @@
+import { createProjectState } from '../project-state.js';
+import { handle as searchCodebaseHandle } from '../tools/search-codebase.js';
+import type {
+  EditPreflightFixture,
+  EditPreflightResponse,
+  EditPreflightRunner,
+  EditPreflightSummary,
+  EditPreflightTask,
+  EditPreflightTaskResult,
+  EvaluateEditPreflightFixtureParams,
+  FormatEditPreflightReportParams
+} from './types.js';
+
+function normalizeText(value: string): string {
+  return value.toLowerCase().replace(/\\/g, '/');
+}
+
+function stripLocationSuffix(fileRef: string): string {
+  return fileRef.replace(/:(\d+)(?:-\d+)?$/, '');
+}
+
+function matchesPatterns(candidate: string, patterns: string[] | undefined): boolean {
+  if (!patterns || patterns.length === 0) {
+    return false;
+  }
+
+  const normalizedCandidate = normalizeText(candidate);
+  return patterns.some((pattern) => normalizedCandidate.includes(normalizeText(pattern)));
+}
+
+function findFirstRelevantHit(topFiles: string[], patterns: string[] | undefined): number | null {
+  if (!patterns || patterns.length === 0) {
+    return null;
+  }
+
+  for (let index = 0; index < topFiles.length; index++) {
+    if (matchesPatterns(topFiles[index], patterns)) {
+      return index + 1;
+    }
+  }
+
+  return null;
+}
+
+function summarizeEditPreflightResults(results: EditPreflightTaskResult[]): EditPreflightSummary {
+  const totalTasks = results.length;
+  const safeResults = results.filter((result) => result.risk === 'safe');
+  const unsafeResults = results.filter((result) => result.risk === 'unsafe');
+  const targetableResults = results.filter((result) => result.topTargetInTop3 !== null);
+  const bestExampleResults = results.filter((result) => result.bestExampleHit !== null);
+  const firstRelevantHits = results
+    .map((result) => result.firstRelevantHit)
+    .filter((value): value is number => typeof value === 'number');
+
+  const topTargetInTop3Count = targetableResults.filter((result) => result.topTargetInTop3).length;
+  const bestExampleHitCount = bestExampleResults.filter((result) => result.bestExampleHit).length;
+  const safeTaskReadyCount = safeResults.filter((result) => result.ready).length;
+  const unsafeTaskAbstainCount = unsafeResults.filter((result) => result.abstain).length;
+  const unsafeReadyFalsePositiveCount = unsafeResults.filter((result) => result.ready).length;
+
+  return {
+    totalTasks,
+    safeTasks: safeResults.length,
+    unsafeTasks: unsafeResults.length,
+    targetableTasks: targetableResults.length,
+    bestExampleTasks: bestExampleResults.length,
+    topTargetInTop3Count,
+    topTargetInTop3Rate:
+      targetableResults.length > 0 ? topTargetInTop3Count / targetableResults.length : null,
+    averageFirstRelevantHit:
+      firstRelevantHits.length > 0
+        ? firstRelevantHits.reduce((sum, value) => sum + value, 0) / firstRelevantHits.length
+        : null,
+    bestExampleHitCount,
+    bestExampleHitRate:
+      bestExampleResults.length > 0 ? bestExampleHitCount / bestExampleResults.length : null,
+    safeTaskReadyCount,
+    safeTaskReadyRate: safeResults.length > 0 ? safeTaskReadyCount / safeResults.length : null,
+    unsafeTaskAbstainCount,
+    unsafeTaskAbstainRate:
+      unsafeResults.length > 0 ? unsafeTaskAbstainCount / unsafeResults.length : null,
+    unsafeReadyFalsePositiveCount,
+    unsafeReadyFalsePositiveRate:
+      unsafeResults.length > 0 ? unsafeReadyFalsePositiveCount / unsafeResults.length : null,
+    results
+  };
+}
+
+function evaluateTask(task: EditPreflightTask, response: EditPreflightResponse): EditPreflightTaskResult {
+  const topFiles = (response.results ?? [])
+    .map((result) => (typeof result.file === 'string' ? stripLocationSuffix(result.file) : ''))
+    .filter((filePath): filePath is string => Boolean(filePath));
+  const firstRelevantHit = findFirstRelevantHit(topFiles, task.expectedTargetPatterns);
+  const bestExample =
+    typeof response.preflight?.bestExample === 'string' ? response.preflight.bestExample : null;
+  const bestExampleHit =
+    task.expectedBestExamplePatterns && task.expectedBestExamplePatterns.length > 0
+      ? bestExample !== null && matchesPatterns(bestExample, task.expectedBestExamplePatterns)
+      : null;
+
+  return {
+    taskId: task.id,
+    title: task.title,
+    query: task.query,
+    risk: task.risk,
+    ready: response.preflight?.ready === true,
+    abstain: response.preflight?.abstain === true,
+    searchQualityStatus: response.searchQuality?.status ?? 'unknown',
+    topFiles,
+    firstRelevantHit,
+    topTargetInTop3:
+      task.expectedTargetPatterns && task.expectedTargetPatterns.length > 0
+        ? firstRelevantHit !== null && firstRelevantHit <= 3
+        : null,
+    bestExample,
+    bestExampleHit,
+    ...(typeof response.preflight?.nextAction === 'string' && {
+      nextAction: response.preflight.nextAction
+    }),
+    ...(Array.isArray(response.preflight?.warnings) &&
+      response.preflight.warnings.length > 0 && { warnings: response.preflight.warnings }),
+    ...(Array.isArray(response.preflight?.whatWouldHelp) &&
+      response.preflight.whatWouldHelp.length > 0 && {
+        whatWouldHelp: response.preflight.whatWouldHelp
+      })
+  };
+}
+
+async function runSearchPreflight(
+  task: EditPreflightTask,
+  rootPath: string
+): Promise<EditPreflightResponse> {
+  const project = createProjectState(rootPath);
+  project.indexState.status = 'ready';
+
+  const response = await searchCodebaseHandle(
+    {
+      query: task.query,
+      intent: 'edit',
+      limit: task.limit ?? 5
+    },
+    {
+      indexState: project.indexState,
+      paths: project.paths,
+      rootPath: project.rootPath,
+      performIndexing: () => undefined
+    }
+  );
+  const payload = response.content?.[0]?.text ?? '{}';
+  const parsed = JSON.parse(payload) as unknown;
+
+  if (typeof parsed === 'object' && parsed !== null) {
+    return parsed as EditPreflightResponse;
+  }
+
+  return {};
+}
+
+export async function evaluateEditPreflightFixture({
+  fixture,
+  rootPath,
+  runner = runSearchPreflight
+}: EvaluateEditPreflightFixtureParams): Promise<EditPreflightSummary> {
+  const results: EditPreflightTaskResult[] = [];
+
+  for (const task of fixture.tasks) {
+    const response = await runner(task, rootPath);
+    results.push(evaluateTask(task, response));
+  }
+
+  return summarizeEditPreflightResults(results);
+}
+
+export function combineEditPreflightSummaries(
+  summaries: EditPreflightSummary[]
+): EditPreflightSummary {
+  return summarizeEditPreflightResults(summaries.flatMap((summary) => summary.results));
+}
+
+function formatRate(value: number | null): string {
+  if (value === null) {
+    return 'n/a';
+  }
+
+  return `${(value * 100).toFixed(0)}%`;
+}
+
+function formatHit(value: number | null): string {
+  return value === null ? 'n/a' : value.toFixed(2);
+}
+
+export function formatEditPreflightReport({
+  codebaseLabel,
+  fixturePath,
+  summary
+}: FormatEditPreflightReportParams): string {
+  const lines: string[] = [];
+  const unsafeFalsePositives = summary.results.filter(
+    (result) => result.risk === 'unsafe' && result.ready
+  );
+  const safeMisses = summary.results.filter((result) => result.risk === 'safe' && !result.ready);
+
+  lines.push(`\n=== Edit Preflight Eval Report: ${codebaseLabel} ===`);
+  lines.push(`Fixture: ${fixturePath}`);
+  lines.push(`Tasks: ${summary.totalTasks} (${summary.safeTasks} safe, ${summary.unsafeTasks} unsafe)`);
+  lines.push(
+    `Top-target in top-3: ${summary.topTargetInTop3Count}/${summary.targetableTasks} (${formatRate(summary.topTargetInTop3Rate)})`
+  );
+  lines.push(`Average first relevant hit: ${formatHit(summary.averageFirstRelevantHit)}`);
+  lines.push(
+    `Best-example hit rate: ${summary.bestExampleHitCount}/${summary.bestExampleTasks} (${formatRate(summary.bestExampleHitRate)})`
+  );
+  lines.push(
+    `Safe-task ready rate: ${summary.safeTaskReadyCount}/${summary.safeTasks} (${formatRate(summary.safeTaskReadyRate)})`
+  );
+  lines.push(
+    `Unsafe-task abstain rate: ${summary.unsafeTaskAbstainCount}/${summary.unsafeTasks} (${formatRate(summary.unsafeTaskAbstainRate)})`
+  );
+  lines.push(
+    `Unsafe ready=true false-positive rate: ${summary.unsafeReadyFalsePositiveCount}/${summary.unsafeTasks} (${formatRate(summary.unsafeReadyFalsePositiveRate)})`
+  );
+  lines.push('');
+  lines.push('Task results:');
+
+  for (const result of summary.results) {
+    const taskLine = [
+      `- ${result.taskId}`,
+      `[${result.risk}]`,
+      `ready=${result.ready ? 'yes' : 'no'}`,
+      `abstain=${result.abstain ? 'yes' : 'no'}`,
+      `firstRelevant=${result.firstRelevantHit ?? 'n/a'}`,
+      `top3=${result.topTargetInTop3 === null ? 'n/a' : result.topTargetInTop3 ? 'hit' : 'miss'}`,
+      `bestExample=${result.bestExampleHit === null ? 'n/a' : result.bestExampleHit ? 'hit' : 'miss'}`,
+      `quality=${result.searchQualityStatus}`
+    ];
+    lines.push(taskLine.join(' '));
+  }
+
+  lines.push('');
+  lines.push('Unsafe false positives:');
+  if (unsafeFalsePositives.length === 0) {
+    lines.push('  (none)');
+  } else {
+    for (const result of unsafeFalsePositives) {
+      lines.push(`  - ${result.taskId}: "${result.query}"`);
+    }
+  }
+
+  lines.push('');
+  lines.push('Safe misses:');
+  if (safeMisses.length === 0) {
+    lines.push('  (none)');
+  } else {
+    for (const result of safeMisses) {
+      lines.push(`  - ${result.taskId}: "${result.query}"`);
+      if (result.nextAction) {
+        lines.push(`    next: ${result.nextAction}`);
+      }
+    }
+  }
+
+  lines.push('================================');
+  return lines.join('\n');
+}
+
+export type { EditPreflightRunner };
diff --git a/src/eval/run-config.ts b/src/eval/run-config.ts
new file mode 100644
index 0000000..3484a2f
--- /dev/null
+++ b/src/eval/run-config.ts
@@ -0,0 +1,37 @@
+import path from 'path';
+
+export type EvalMode = 'retrieval' | 'discovery' | 'edit-preflight';
+
+export interface EvalFixtureDefaults {
+  fixtureA: string;
+  fixtureB: string;
+}
+
+export function resolveEvalMode(rawMode: string | undefined): EvalMode {
+  if (rawMode === 'discovery' || rawMode === 'edit-preflight') {
+    return rawMode;
+  }
+
+  return 'retrieval';
+}
+
+export function getDefaultFixturePaths(projectRoot: string, mode: EvalMode): EvalFixtureDefaults {
+  if (mode === 'discovery') {
+    return {
+      fixtureA: path.join(projectRoot, 'tests', 'fixtures', 'discovery-angular-spotify.json'),
+      fixtureB: path.join(projectRoot, 'tests', 'fixtures', 'discovery-excalidraw.json')
+    };
+  }
+
+  if (mode === 'edit-preflight') {
+    return {
+      fixtureA: path.join(projectRoot, 'tests', 'fixtures', 'edit-preflight-angular-spotify.json'),
+      fixtureB: path.join(projectRoot, 'tests', 'fixtures', 'edit-preflight-excalidraw.json')
+    };
+  }
+
+  return {
+    fixtureA: path.join(projectRoot, 'tests', 'fixtures', 'eval-angular-spotify.json'),
+    fixtureB: path.join(projectRoot, 'tests', 'fixtures', 'eval-controlled.json')
+  };
+}
diff --git a/src/eval/types.ts b/src/eval/types.ts
index a4ced39..d80e777 100644
--- a/src/eval/types.ts
+++ b/src/eval/types.ts
@@ -64,6 +64,102 @@ export interface FormatEvalReportParams {
   redactPaths?: boolean;
 }
 
+export type EditPreflightRisk = 'safe' | 'unsafe';
+
+export interface EditPreflightTask {
+  id: string;
+  title: string;
+  query: string;
+  risk: EditPreflightRisk;
+  expectedTargetPatterns?: string[];
+  expectedBestExamplePatterns?: string[];
+  limit?: number;
+  notes?: string;
+}
+
+export interface EditPreflightFixture {
+  description?: string;
+  codebase?: string;
+  repository?: string;
+  repositoryUrl?: string;
+  repositoryRef?: string;
+  frozenDate?: string;
+  notes?: string;
+  tasks: EditPreflightTask[];
+}
+
+export interface EditPreflightTaskResult {
+  taskId: string;
+  title: string;
+  query: string;
+  risk: EditPreflightRisk;
+  ready: boolean;
+  abstain: boolean;
+  searchQualityStatus: 'ok' | 'low_confidence' | 'unknown';
+  topFiles: string[];
+  firstRelevantHit: number | null;
+  topTargetInTop3: boolean | null;
+  bestExample: string | null;
+  bestExampleHit: boolean | null;
+  nextAction?: string;
+  warnings?: string[];
+  whatWouldHelp?: string[];
+}
+
+export interface EditPreflightSummary {
+  totalTasks: number;
+  safeTasks: number;
+  unsafeTasks: number;
+  targetableTasks: number;
+  bestExampleTasks: number;
+  topTargetInTop3Count: number;
+  topTargetInTop3Rate: number | null;
+  averageFirstRelevantHit: number | null;
+  bestExampleHitCount: number;
+  bestExampleHitRate: number | null;
+  safeTaskReadyCount: number;
+  safeTaskReadyRate: number | null;
+  unsafeTaskAbstainCount: number;
+  unsafeTaskAbstainRate: number | null;
+  unsafeReadyFalsePositiveCount: number;
+  unsafeReadyFalsePositiveRate: number | null;
+  results: EditPreflightTaskResult[];
+}
+
+export interface EvaluateEditPreflightFixtureParams {
+  fixture: EditPreflightFixture;
+  rootPath: string;
+  runner?: EditPreflightRunner;
+}
+
+export interface FormatEditPreflightReportParams {
+  codebaseLabel: string;
+  fixturePath: string;
+  summary: EditPreflightSummary;
+}
+
+export interface EditPreflightResponse {
+  preflight?: {
+    ready?: boolean;
+    abstain?: boolean;
+    bestExample?: string;
+    nextAction?: string;
+    warnings?: string[];
+    whatWouldHelp?: string[];
+  };
+  searchQuality?: {
+    status?: 'ok' | 'low_confidence';
+  };
+  results?: Array<{
+    file?: string;
+  }>;
+}
+
+export type EditPreflightRunner = (
+  task: EditPreflightTask,
+  rootPath: string
+) => Promise<EditPreflightResponse>;
+
 export type DiscoveryJob = 'map' | 'find' | 'search';
 
 export type DiscoverySurface =
diff --git a/tests/benchmark-comparators.test.ts b/tests/benchmark-comparators.test.ts
index 8863ef4..e10cbd4 100644
--- a/tests/benchmark-comparators.test.ts
+++ b/tests/benchmark-comparators.test.ts
@@ -8,6 +8,10 @@ async function importHelper() {
   return import(pathToFileURL(path.resolve(__dirname, '..', 'scripts', 'lib', 'managed-mcp-session.mjs')).href);
 }
 
+async function importRunner() {
+  return import(pathToFileURL(path.resolve(__dirname, '..', 'scripts', 'benchmark-comparators.mjs')).href);
+}
+
 function isProcessAlive(pid: number): boolean {
   try {
     process.kill(pid, 0);
@@ -90,3 +94,98 @@ describe('managed MCP benchmark sessions', () => {
     await waitForProcessExit(pid as number);
   });
 });
+
+describe('benchmark comparator aggregation', () => {
+  it('marks empty task payloads as pending evidence instead of ok', async () => {
+    const { aggregateResults } = await importRunner();
+    const aggregated = aggregateResults([
+      {
+        taskId: 't1',
+        job: 'search',
+        surface: 'search_codebase',
+        usefulnessScore: 0,
+        matchedSignals: [],
+        missingSignals: ['results'],
+        payloadBytes: 19,
+        estimatedTokens: 5,
+        toolCallCount: 1,
+        elapsedMs: 1
+      }
+    ]);
+
+    expect(aggregated.status).toBe('pending_evidence');
+    expect(aggregated.reason).toMatch(/usable benchmark evidence/i);
+    expect(aggregated.averageFirstRelevantHit).toBeNull();
+    expect(aggregated.bestExampleUsefulnessRate).toBeNull();
+  });
+
+  it('computes ranked-hit and best-example metrics when task evidence exists', async () => {
+    const { aggregateResults } = await importRunner();
+    const aggregated = aggregateResults([
+      {
+        taskId: 'search-1',
+        job: 'search',
+        surface: 'search_codebase',
+        usefulnessScore: 0.5,
+        matchedSignals: ['results'],
+        missingSignals: ['searchQuality'],
+        payloadBytes: 200,
+        estimatedTokens: 50,
+        toolCallCount: 1,
+        elapsedMs: 10,
+        firstRelevantHit: 2
+      },
+      {
+        taskId: 'find-1',
+        job: 'find',
+        surface: 'search_codebase',
+        usefulnessScore: 1,
+        matchedSignals: ['bestExample'],
+        missingSignals: [],
+        payloadBytes: 220,
+        estimatedTokens: 55,
+        toolCallCount: 1,
+        elapsedMs: 12,
+        bestExampleUseful: true
+      }
+    ]);
+
+    expect(aggregated.status).toBe('ok');
+    expect(aggregated.averageFirstRelevantHit).toBe(2);
+    expect(aggregated.bestExampleUsefulnessRate).toBe(1);
+  });
+});
+
+describe('raw Claude result parsing', () => {
+  it('extracts files and bestExample from structured Claude output', async () => {
+    const { parseRawClaudeStructuredResult } = await importRunner();
+    const parsed = parseRawClaudeStructuredResult(
+      JSON.stringify({
+        answer: 'Use AuthInterceptor and auth.effects patterns.',
+        files: ['src/auth/auth.interceptor.ts', 'src/auth/auth.effects.ts'],
+        bestExample: 'src/auth/auth.interceptor.ts'
+      })
+    );
+
+    expect(parsed.payload).toContain('AuthInterceptor');
+    expect(parsed.topFiles).toEqual([
+      'src/auth/auth.interceptor.ts',
+      'src/auth/auth.effects.ts'
+    ]);
+    expect(parsed.bestExample).toBe('src/auth/auth.interceptor.ts');
+  });
+
+  it('extracts files and bestExample from fenced JSON Claude output', async () => {
+    const { parseRawClaudeStructuredResult } = await importRunner();
+    const parsed = parseRawClaudeStructuredResult(`\`\`\`json
+{"answer":"Use AuthInterceptor and auth.effects patterns.","files":["src/auth/auth.interceptor.ts","src/auth/auth.effects.ts"],"bestExample":"src/auth/auth.interceptor.ts"}
+\`\`\``);
+
+    expect(parsed.payload).toContain('AuthInterceptor');
+    expect(parsed.topFiles).toEqual([
+      'src/auth/auth.interceptor.ts',
+      'src/auth/auth.effects.ts'
+    ]);
+    expect(parsed.bestExample).toBe('src/auth/auth.interceptor.ts');
+  });
+});
diff --git a/tests/edit-preflight-harness.test.ts b/tests/edit-preflight-harness.test.ts
new file mode 100644
index 0000000..6332d15
--- /dev/null
+++ b/tests/edit-preflight-harness.test.ts
@@ -0,0 +1,243 @@
+import { describe, expect, it } from 'vitest';
+import {
+  combineEditPreflightSummaries,
+  evaluateEditPreflightFixture,
+  formatEditPreflightReport
+} from '../src/eval/edit-preflight-harness.js';
+import type {
+  EditPreflightFixture,
+  EditPreflightResponse,
+  EditPreflightSummary
+} from '../src/eval/types.js';
+import angularEditPreflightFixture from './fixtures/edit-preflight-angular-spotify.json';
+import excalidrawEditPreflightFixture from './fixtures/edit-preflight-excalidraw.json';
+
+describe('Edit preflight fixtures', () => {
+  it('keeps both public edit-preflight fixtures frozen at 10 tasks each with safe/unsafe balance', () => {
+    for (const fixture of [angularEditPreflightFixture, excalidrawEditPreflightFixture]) {
+      expect(fixture.tasks).toHaveLength(10);
+      const counts = fixture.tasks.reduce<Record<string, number>>((acc, task) => {
+        acc[task.risk] = (acc[task.risk] ?? 0) + 1;
+        return acc;
+      }, {});
+      expect(counts.safe).toBe(6);
+      expect(counts.unsafe).toBe(4);
+    }
+  });
+
+  it('pins both edit-preflight fixtures to concrete repository refs', () => {
+    expect(angularEditPreflightFixture.repositoryRef).toMatch(/^[0-9a-f]{40}$/);
+    expect(excalidrawEditPreflightFixture.repositoryRef).toMatch(/^[0-9a-f]{40}$/);
+  });
+});
+
+describe('Edit preflight harness scoring', () => {
+  it('scores target hits, best-example hits, safe ready rate, and unsafe abstention deterministically', async () => {
+    const fixture: EditPreflightFixture = {
+      tasks: [
+        {
+          id: 'safe-1',
+          title: 'Safe auth edit',
+          query: 'edit auth headers',
+          risk: 'safe',
+          expectedTargetPatterns: ['auth.interceptor.ts'],
+          expectedBestExamplePatterns: ['auth.interceptor.ts']
+        },
+        {
+          id: 'safe-2',
+          title: 'Safe player edit',
+          query: 'edit player flow',
+          risk: 'safe',
+          expectedTargetPatterns: ['player-api.ts'],
+          expectedBestExamplePatterns: ['player-api.ts']
+        },
+        {
+          id: 'unsafe-1',
+          title: 'Unsafe migration',
+          query: 'rewrite everything',
+          risk: 'unsafe'
+        }
+      ]
+    };
+
+    const responses: Record<string, EditPreflightResponse> = {
+      'edit auth headers': {
+        preflight: {
+          ready: true,
+          bestExample: 'src/http/auth.interceptor.ts'
+        },
+        searchQuality: { status: 'ok' },
+        results: [
+          { file: 'src/http/auth.interceptor.ts:1-20' },
+          { file: 'src/http/error.interceptor.ts:1-20' }
+        ]
+      },
+      'edit player flow': {
+        preflight: {
+          ready: false,
+          bestExample: 'src/player/player-api.ts',
+          nextAction: 'Search for callers before editing.'
+        },
+        searchQuality: { status: 'ok' },
+        results: [
+          { file: 'src/player/player-helper.ts:1-20' },
+          { file: 'src/player/player-api.ts:1-20' }
+        ]
+      },
+      'rewrite everything': {
+        preflight: {
+          ready: false,
+          abstain: true,
+          nextAction: 'Break the request into smaller edits.'
+        },
+        searchQuality: { status: 'low_confidence' },
+        results: [{ file: 'src/app/app.ts:1-20' }]
+      }
+    };
+
+    const summary = await evaluateEditPreflightFixture({
+      fixture,
+      rootPath: 'C:/repo',
+      runner: async (task) => responses[task.query] ?? {}
+    });
+
+    expect(summary.totalTasks).toBe(3);
+    expect(summary.topTargetInTop3Count).toBe(2);
+    expect(summary.topTargetInTop3Rate).toBe(1);
+    expect(summary.averageFirstRelevantHit).toBe(1.5);
+    expect(summary.bestExampleHitRate).toBe(1);
+    expect(summary.safeTaskReadyRate).toBe(0.5);
+    expect(summary.unsafeTaskAbstainRate).toBe(1);
+    expect(summary.unsafeReadyFalsePositiveRate).toBe(0);
+  });
+
+  it('combines summaries by recomputing aggregate rates from task results', () => {
+    const combined = combineEditPreflightSummaries([
+      createSummary({
+        results: [
+          {
+            taskId: 'safe-1',
+            title: 'safe-1',
+            query: 'safe-1',
+            risk: 'safe',
+            ready: true,
+            abstain: false,
+            searchQualityStatus: 'ok',
+            topFiles: ['src/auth.ts'],
+            firstRelevantHit: 1,
+            topTargetInTop3: true,
+            bestExample: 'src/auth.ts',
+            bestExampleHit: true
+          }
+        ]
+      }),
+      createSummary({
+        results: [
+          {
+            taskId: 'unsafe-1',
+            title: 'unsafe-1',
+            query: 'unsafe-1',
+            risk: 'unsafe',
+            ready: false,
+            abstain: true,
+            searchQualityStatus: 'low_confidence',
+            topFiles: ['src/app.ts'],
+            firstRelevantHit: null,
+            topTargetInTop3: null,
+            bestExample: null,
+            bestExampleHit: null
+          }
+        ]
+      })
+    ]);
+
+    expect(combined.totalTasks).toBe(2);
+    expect(combined.safeTaskReadyRate).toBe(1);
+    expect(combined.unsafeTaskAbstainRate).toBe(1);
+    expect(combined.unsafeReadyFalsePositiveRate).toBe(0);
+  });
+
+  it('formats a bounded edit-preflight report with false-positive and safe-miss sections', () => {
+    const report = formatEditPreflightReport({
+      codebaseLabel: 'fixture-repo',
+      fixturePath: 'tests/fixtures/edit-preflight-angular-spotify.json',
+      summary: createSummary({
+        results: [
+          {
+            taskId: 'safe-1',
+            title: 'safe-1',
+            query: 'safe query',
+            risk: 'safe',
+            ready: false,
+            abstain: false,
+            searchQualityStatus: 'ok',
+            topFiles: ['src/auth.ts'],
+            firstRelevantHit: 2,
+            topTargetInTop3: true,
+            bestExample: 'src/auth.ts',
+            bestExampleHit: true,
+            nextAction: 'Search for callers first.'
+          },
+          {
+            taskId: 'unsafe-1',
+            title: 'unsafe-1',
+            query: 'unsafe query',
+            risk: 'unsafe',
+            ready: true,
+            abstain: false,
+            searchQualityStatus: 'ok',
+            topFiles: ['src/app.ts'],
+            firstRelevantHit: null,
+            topTargetInTop3: null,
+            bestExample: null,
+            bestExampleHit: null
+          }
+        ],
+        totalTasks: 2,
+        safeTasks: 1,
+        unsafeTasks: 1,
+        targetableTasks: 1,
+        bestExampleTasks: 1,
+        topTargetInTop3Count: 1,
+        topTargetInTop3Rate: 1,
+        averageFirstRelevantHit: 2,
+        bestExampleHitCount: 1,
+        bestExampleHitRate: 1,
+        safeTaskReadyCount: 0,
+        safeTaskReadyRate: 0,
+        unsafeTaskAbstainCount: 0,
+        unsafeTaskAbstainRate: 0,
+        unsafeReadyFalsePositiveCount: 1,
+        unsafeReadyFalsePositiveRate: 1
+      })
+    });
+
+    expect(report).toContain('Edit Preflight Eval Report');
+    expect(report).toContain('Unsafe false positives:');
+    expect(report).toContain('Safe misses:');
+    expect(report).toContain('next: Search for callers first.');
+  });
+});
+
+function createSummary(overrides: Partial<EditPreflightSummary> = {}): EditPreflightSummary {
+  return {
+    totalTasks: 0,
+    safeTasks: 0,
+    unsafeTasks: 0,
+    targetableTasks: 0,
+    bestExampleTasks: 0,
+    topTargetInTop3Count: 0,
+    topTargetInTop3Rate: null,
+    averageFirstRelevantHit: null,
+    bestExampleHitCount: 0,
+    bestExampleHitRate: null,
+    safeTaskReadyCount: 0,
+    safeTaskReadyRate: null,
+    unsafeTaskAbstainCount: 0,
+    unsafeTaskAbstainRate: null,
+    unsafeReadyFalsePositiveCount: 0,
+    unsafeReadyFalsePositiveRate: null,
+    results: [],
+    ...overrides
+  };
+}
diff --git a/tests/fixtures/README.md b/tests/fixtures/README.md
index 18d954c..073c71a 100644
--- a/tests/fixtures/README.md
+++ b/tests/fixtures/README.md
@@ -1,6 +1,6 @@
 # Evaluation Fixtures
 
-This directory contains frozen evaluation sets for testing retrieval and discovery quality.
+This directory contains frozen evaluation sets for testing retrieval, discovery, and edit-preflight quality.
 
 ## Files
 
@@ -8,6 +8,8 @@ This directory contains frozen evaluation sets for testing retrieval and discove
 - `eval-controlled.json` - 20 frozen retrieval queries for the in-repo controlled fixture codebase
 - `discovery-angular-spotify.json` - 12 discovery tasks for `angular-spotify`
 - `discovery-excalidraw.json` - 12 discovery tasks for `Excalidraw`
+- `edit-preflight-angular-spotify.json` - 10 edit-readiness tasks for `angular-spotify`
+- `edit-preflight-excalidraw.json` - 10 edit-readiness tasks for `Excalidraw`
 - `discovery-benchmark-protocol.json` - frozen scope, comparator set, fairness rules, and ship gate for the discovery benchmark
 
 ## Running Evaluations
@@ -42,6 +44,12 @@ node scripts/run-eval.mjs tests/fixtures/codebases/eval-controlled --mode retrie
 node scripts/run-eval.mjs /path/to/angular-spotify /path/to/excalidraw --mode discovery
 ```
 
+### Run Edit-Preflight Evaluation
+
+```bash
+node scripts/run-eval.mjs /path/to/angular-spotify /path/to/excalidraw --mode edit-preflight
+```
+
 Optional comparator evidence file:
 
 ```bash
@@ -66,6 +74,15 @@ The discovery harness outputs:
 - **Average first relevant hit**: position of the first relevant file for search tasks
 - **Best-example usefulness**: whether find tasks surfaced the expected exemplar
 
+The edit-preflight harness outputs:
+
+- **Top-target in top-3**: whether the expected edit surface appears within the first three results
+- **Average first relevant hit**: average ranking position of the first expected edit surface
+- **Best-example hit rate**: whether preflight `bestExample` matches the expected local exemplar
+- **Safe-task ready rate**: how often concrete local edits return `ready=true`
+- **Unsafe-task abstain rate**: how often broad or migration-scale asks return `abstain=true`
+- **Unsafe `ready=true` false-positive rate**: how often unsafe asks are incorrectly marked ready
+
 ## Evaluation Integrity Rules
 
 ⚠️ **CRITICAL**: These fixtures are FROZEN. Once committed:
@@ -81,6 +98,11 @@ For discovery specifically:
 6. **DO NOT** claim implementation quality from this benchmark
 7. **DO** keep comparator setup limitations explicit when a lane requires manual log capture
 
+For edit-preflight specifically:
+
+8. **DO NOT** convert these tasks into patch-quality or autonomous-edit claims
+9. **DO** treat unsafe-task false positives as the critical failure signal
+
 ### Proper Usage
 
 ✅ **CORRECT**:
@@ -167,6 +189,14 @@ git -C /path/to/excalidraw checkout e18c1dd213000dde0ae94ef7eb00aab537b39708
 3. Run eval on both pinned repos
 4. Compare metrics transparently
 
+### Edit-Preflight Scope
+
+Edit-preflight mode is intentionally non-comparator and launch-readiness oriented:
+
+1. It only evaluates the shipped `search_codebase` edit preflight
+2. It measures navigation/readiness signals, not code generation quality
+3. It keeps safe and unsafe tasks explicit so false positives are visible
+
 ## Discovery Benchmark Scope
 
 Phase 5 freezes discovery around three jobs only:
diff --git a/tests/fixtures/edit-preflight-angular-spotify.json b/tests/fixtures/edit-preflight-angular-spotify.json
new file mode 100644
index 0000000..8004e22
--- /dev/null
+++ b/tests/fixtures/edit-preflight-angular-spotify.json
@@ -0,0 +1,93 @@
+{
+  "description": "Frozen edit-preflight tasks for angular-spotify. This suite measures readiness and abstention behavior, not autonomous edit quality.",
+  "codebase": "angular-spotify",
+  "repository": "trungk18/angular-spotify",
+  "repositoryUrl": "https://github.com/trungk18/angular-spotify",
+  "repositoryRef": "ff9efa765c53cfde78c9a172c62d515ae8ef9fe0",
+  "frozenDate": "2026-04-17",
+  "notes": "Safe tasks are concrete local edits. Unsafe tasks are intentionally broad or high-impact and should not be used to justify ready=true claims without stronger evidence.",
+  "tasks": [
+    {
+      "id": "as-ep-01",
+      "title": "Tight auth header edit",
+      "query": "update how authorization token headers are attached to API requests",
+      "risk": "safe",
+      "expectedTargetPatterns": ["auth", "interceptor"],
+      "expectedBestExamplePatterns": ["auth", "interceptor"],
+      "notes": "A local interceptor change should be navigable with an edit-ready preflight."
+    },
+    {
+      "id": "as-ep-02",
+      "title": "Playback next-track behavior",
+      "query": "change the logic that skips to the next song",
+      "risk": "safe",
+      "expectedTargetPatterns": ["player-api", "player/api"],
+      "expectedBestExamplePatterns": ["player"],
+      "notes": "Targets the playback API surface used in retrieval/discovery fixtures."
+    },
+    {
+      "id": "as-ep-03",
+      "title": "Recently played fetch flow",
+      "query": "edit how recently played tracks are fetched",
+      "risk": "safe",
+      "expectedTargetPatterns": ["player", "recent"],
+      "expectedBestExamplePatterns": ["player"],
+      "notes": "Concrete API behavior with a narrow blast radius."
+    },
+    {
+      "id": "as-ep-04",
+      "title": "Album store loading flow",
+      "query": "modify how saved albums are loaded in application state",
+      "risk": "safe",
+      "expectedTargetPatterns": ["album", "store"],
+      "expectedBestExamplePatterns": ["album", "store"],
+      "notes": "A focused state-management edit."
+    },
+    {
+      "id": "as-ep-05",
+      "title": "Local persistence helper",
+      "query": "change how browser session data is persisted locally",
+      "risk": "safe",
+      "expectedTargetPatterns": ["storage", "local-storage"],
+      "expectedBestExamplePatterns": ["storage", "local-storage"],
+      "notes": "Targets the local storage service."
+    },
+    {
+      "id": "as-ep-06",
+      "title": "Playlist API request helper",
+      "query": "update playlist API request handling",
+      "risk": "safe",
+      "expectedTargetPatterns": ["playlist", "api"],
+      "expectedBestExamplePatterns": ["playlist", "api"],
+      "notes": "Exact-name adjacent edit on a concrete service."
+    },
+    {
+      "id": "as-ep-07",
+      "title": "Rewrite auth behavior everywhere",
+      "query": "rewrite every auth-related request and token flow across the app in one change",
+      "risk": "unsafe",
+      "notes": "Broad, cross-cutting request that should not be treated as a ready local edit."
+    },
+    {
+      "id": "as-ep-08",
+      "title": "Migrate all NgRx state at once",
+      "query": "replace all ngrx state management with a new pattern across the whole app",
+      "risk": "unsafe",
+      "notes": "Migration-scale ask with intentionally high impact."
+    },
+    {
+      "id": "as-ep-09",
+      "title": "Refactor every interceptor path",
+      "query": "refactor all interceptors and token refresh behavior throughout the repository",
+      "risk": "unsafe",
+      "notes": "Multiple coupled subsystems, not a single safe edit target."
+    },
+    {
+      "id": "as-ep-10",
+      "title": "Remove analytics globally",
+      "query": "remove every analytics and tracking hook from the entire app",
+      "risk": "unsafe",
+      "notes": "Repository-wide removal request intended to test abstention."
+    }
+  ]
+}
diff --git a/tests/fixtures/edit-preflight-excalidraw.json b/tests/fixtures/edit-preflight-excalidraw.json
new file mode 100644
index 0000000..1a45a27
--- /dev/null
+++ b/tests/fixtures/edit-preflight-excalidraw.json
@@ -0,0 +1,93 @@
+{
+  "description": "Frozen edit-preflight tasks for Excalidraw. This suite measures whether the current preflight finds the right edit surface and abstains on unsafe asks.",
+  "codebase": "Excalidraw",
+  "repository": "excalidraw/excalidraw",
+  "repositoryUrl": "https://github.com/excalidraw/excalidraw",
+  "repositoryRef": "e18c1dd213000dde0ae94ef7eb00aab537b39708",
+  "frozenDate": "2026-04-17",
+  "notes": "Safe tasks stay local to a scene, element, serialization, or app-state surface. Unsafe tasks intentionally span multiple subsystems or migration-scale edits.",
+  "tasks": [
+    {
+      "id": "ex-ep-01",
+      "title": "Scene update flow",
+      "query": "change how scene updates are applied",
+      "risk": "safe",
+      "expectedTargetPatterns": ["scene"],
+      "expectedBestExamplePatterns": ["scene"],
+      "notes": "Focused scene-edit behavior used in current discovery coverage."
+    },
+    {
+      "id": "ex-ep-02",
+      "title": "Element type definitions",
+      "query": "edit element type definitions",
+      "risk": "safe",
+      "expectedTargetPatterns": ["element", "type"],
+      "expectedBestExamplePatterns": ["element", "type"],
+      "notes": "Concrete type-oriented edit surface."
+    },
+    {
+      "id": "ex-ep-03",
+      "title": "Scene JSON serialization",
+      "query": "modify scene serialization to json export",
+      "risk": "safe",
+      "expectedTargetPatterns": ["scene", "json", "data"],
+      "expectedBestExamplePatterns": ["scene", "json", "data"],
+      "notes": "Narrow export/serialization edit."
+    },
+    {
+      "id": "ex-ep-04",
+      "title": "App state selection flow",
+      "query": "change app state selection and update logic",
+      "risk": "safe",
+      "expectedTargetPatterns": ["appstate", "state", "app"],
+      "expectedBestExamplePatterns": ["appstate", "state", "app"],
+      "notes": "Local app-state behavior."
+    },
+    {
+      "id": "ex-ep-05",
+      "title": "Canvas entry interaction",
+      "query": "edit the main canvas app entry behavior",
+      "risk": "safe",
+      "expectedTargetPatterns": ["app", "excalidraw", "canvas"],
+      "expectedBestExamplePatterns": ["app", "excalidraw", "canvas"],
+      "notes": "Concrete entry-surface edit."
+    },
+    {
+      "id": "ex-ep-06",
+      "title": "Element mutation helper",
+      "query": "change how elements are updated after scene edits",
+      "risk": "safe",
+      "expectedTargetPatterns": ["element", "scene"],
+      "expectedBestExamplePatterns": ["element", "scene"],
+      "notes": "Targets the local element mutation path without asking for repo-wide migration."
+    },
+    {
+      "id": "ex-ep-07",
+      "title": "Rewrite scene mutation architecture",
+      "query": "rewrite all scene mutation flows across the whole app in one pass",
+      "risk": "unsafe",
+      "notes": "Broad architectural request intended to trigger abstention."
+    },
+    {
+      "id": "ex-ep-08",
+      "title": "Replace state model globally",
+      "query": "migrate every app state update path to a new state architecture",
+      "risk": "unsafe",
+      "notes": "Migration-scale change across the repository."
+    },
+    {
+      "id": "ex-ep-09",
+      "title": "Refactor export and collaboration together",
+      "query": "change the entire export pipeline and collaboration serialization at once",
+      "risk": "unsafe",
+      "notes": "Coupled multi-subsystem change that should not look edit-ready from one search."
+    },
+    {
+      "id": "ex-ep-10",
+      "title": "Rename all element concepts",
+      "query": "rename every element type and related references across the repo",
+      "risk": "unsafe",
+      "notes": "Repository-wide rename intended to test unsafe ready=true false positives."
+    }
+  ]
+}
diff --git a/tests/run-eval-config.test.ts b/tests/run-eval-config.test.ts
new file mode 100644
index 0000000..3667473
--- /dev/null
+++ b/tests/run-eval-config.test.ts
@@ -0,0 +1,25 @@
+import path from 'path';
+import { describe, expect, it } from 'vitest';
+import { getDefaultFixturePaths, resolveEvalMode } from '../src/eval/run-config.js';
+
+describe('run-eval mode config', () => {
+  it('recognizes edit-preflight as a first-class eval mode', () => {
+    expect(resolveEvalMode('edit-preflight')).toBe('edit-preflight');
+    expect(resolveEvalMode('discovery')).toBe('discovery');
+    expect(resolveEvalMode('retrieval')).toBe('retrieval');
+  });
+
+  it('keeps retrieval as the fallback mode for unknown values', () => {
+    expect(resolveEvalMode('unknown-mode')).toBe('retrieval');
+    expect(resolveEvalMode(undefined)).toBe('retrieval');
+  });
+
+  it('returns dedicated frozen default fixtures for edit-preflight mode', () => {
+    const defaults = getDefaultFixturePaths('C:/repo', 'edit-preflight');
+
+    expect(defaults).toEqual({
+      fixtureA: path.join('C:/repo', 'tests', 'fixtures', 'edit-preflight-angular-spotify.json'),
+      fixtureB: path.join('C:/repo', 'tests', 'fixtures', 'edit-preflight-excalidraw.json')
+    });
+  });
+});