EntityProcess · christso · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
@@ -192,6 +192,12 @@ export const evalRunCommand = command({
       long: 'exclude-tag',
       description: 'Skip eval files that have this tag (repeatable, file skipped if any match)',
     }),
+    transcript: option({
+      type: optional(string),
+      long: 'transcript',
+      description:
+        'Grade a pre-recorded transcript JSONL instead of invoking a live provider. Ignores targets.',
+    }),
   },
   handler: async (args) => {
     // Launch interactive wizard when no eval paths and stdin is a TTY
@@ -237,6 +243,7 @@ export const evalRunCommand = command({
       threshold: args.threshold,
       tag: args.tag,
       excludeTag: args.excludeTag,
+      transcript: args.transcript,
     };
     const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
     if (result?.allExecutionErrors) {

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -90,6 +90,7 @@ interface NormalizedOptions {
   readonly threshold?: number;
   readonly tags: readonly string[];
   readonly excludeTags: readonly string[];
+  readonly transcript?: string;
 }
 
 function normalizeBoolean(value: unknown): boolean {
@@ -357,6 +358,7 @@ function normalizeOptions(
     threshold: normalizeOptionalNumber(rawOptions.threshold),
     tags: normalizeStringArray(rawOptions.tag),
     excludeTags: normalizeStringArray(rawOptions.excludeTag),
+    transcript: normalizeString(rawOptions.transcript),
   } satisfies NormalizedOptions;
 }
 
@@ -507,63 +509,86 @@ async function prepareFileMetadata(params: {
     category,
   });
   const testIds = suite.tests.map((value) => value.id);
-
-  // Determine target names: CLI --target flags override YAML
-  const cliTargets = options.cliTargets;
   const suiteTargets = suite.targets;
 
-  // Resolve which target names to use (precedence: CLI > suite YAML targets > default)
-  let targetNames: readonly string[];
-  if (cliTargets.length > 0) {
-    targetNames = cliTargets;
-  } else if (suiteTargets && suiteTargets.length > 0) {
-    targetNames = suiteTargets;
-  } else {
-    targetNames = [];
-  }
-
   let selections: { selection: TargetSelection; inlineTargetLabel: string }[];
 
-  if (targetNames.length > 1) {
-    // Matrix mode: multiple targets
-    const multiSelections = await selectMultipleTargets({
-      testFilePath,
-      repoRoot,
-      cwd,
-      explicitTargetsPath: options.targetsPath,
-      dryRun: options.dryRun,
-      dryRunDelay: options.dryRunDelay,
-      dryRunDelayMin: options.dryRunDelayMin,
-      dryRunDelayMax: options.dryRunDelayMax,
-      env: process.env,
-      targetNames,
-    });
-
-    selections = multiSelections.map((sel) => ({
-      selection: sel,
-      inlineTargetLabel: sel.targetName,
-    }));
-  } else {
-    // Single target mode (legacy path)
-    const selection = await selectTarget({
-      testFilePath,
-      repoRoot,
-      cwd,
-      explicitTargetsPath: options.targetsPath,
-      cliTargetName: targetNames.length === 1 ? targetNames[0] : options.target,
-      dryRun: options.dryRun,
-      dryRunDelay: options.dryRunDelay,
-      dryRunDelayMin: options.dryRunDelayMin,
-      dryRunDelayMax: options.dryRunDelayMax,
-      env: process.env,
-    });
-
+  if (options.transcript) {
+    // --transcript mode: bypass target resolution entirely.
+    // Create a synthetic TargetSelection for the transcript provider.
+    const transcriptSelection: TargetSelection = {
+      definitions: [],
+      resolvedTarget: {
+        kind: 'transcript',
+        name: 'transcript',
+        config: {} as Record<string, never>,
+      },
+      targetName: 'transcript',
+      targetSource: 'cli',
+      targetsFilePath: options.transcript,
+    };
     selections = [
       {
-        selection,
-        inlineTargetLabel: selection.targetName,
+        selection: transcriptSelection,
+        inlineTargetLabel: `transcript (${path.basename(options.transcript)})`,
       },
     ];
+  } else {
+    // Determine target names: CLI --target flags override YAML
+    const cliTargets = options.cliTargets;
+    const suiteTargets = suite.targets;
+
+    // Resolve which target names to use (precedence: CLI > suite YAML targets > default)
+    let targetNames: readonly string[];
+    if (cliTargets.length > 0) {
+      targetNames = cliTargets;
+    } else if (suiteTargets && suiteTargets.length > 0) {
+      targetNames = suiteTargets;
+    } else {
+      targetNames = [];
+    }
+
+    if (targetNames.length > 1) {
+      // Matrix mode: multiple targets
+      const multiSelections = await selectMultipleTargets({
+        testFilePath,
+        repoRoot,
+        cwd,
+        explicitTargetsPath: options.targetsPath,
+        dryRun: options.dryRun,
+        dryRunDelay: options.dryRunDelay,
+        dryRunDelayMin: options.dryRunDelayMin,
+        dryRunDelayMax: options.dryRunDelayMax,
+        env: process.env,
+        targetNames,
+      });
+
+      selections = multiSelections.map((sel) => ({
+        selection: sel,
+        inlineTargetLabel: sel.targetName,
+      }));
+    } else {
+      // Single target mode (legacy path)
+      const selection = await selectTarget({
+        testFilePath,
+        repoRoot,
+        cwd,
+        explicitTargetsPath: options.targetsPath,
+        cliTargetName: targetNames.length === 1 ? targetNames[0] : options.target,
+        dryRun: options.dryRun,
+        dryRunDelay: options.dryRunDelay,
+        dryRunDelayMin: options.dryRunDelayMin,
+        dryRunDelayMax: options.dryRunDelayMax,
+        env: process.env,
+      });
+
+      selections = [
+        {
+          selection,
+          inlineTargetLabel: selection.targetName,
+        },
+      ];
+    }
   }
 
   return {
@@ -623,6 +648,9 @@ async function runSingleEvalFile(params: {
   readonly totalBudgetUsd?: number;
   readonly failOnError?: FailOnError;
   readonly threshold?: number;
+  readonly providerFactory?: (
+    target: import('@agentv/core').ResolvedTarget,
+  ) => import('@agentv/core').Provider;
 }): Promise<{ results: EvaluationResult[] }> {
   const {
     testFilePath,
@@ -645,6 +673,7 @@ async function runSingleEvalFile(params: {
     matrixMode,
     totalBudgetUsd,
     failOnError,
+    providerFactory,
   } = params;
 
   const targetName = selection.targetName;
@@ -742,6 +771,7 @@ async function runSingleEvalFile(params: {
     graderTarget: options.graderTarget,
     model: options.model,
     threshold: options.threshold,
+    providerFactory,
     streamCallbacks: streamingObserver?.getStreamCallbacks(),
     onResult: async (result: EvaluationResult) => {
       (
@@ -1198,6 +1228,31 @@ export async function runEvalCommand(
   // Use only files that survived tag filtering (fileMetadata keys)
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
 
+  // --transcript: create a shared TranscriptProvider and validate line count
+  let transcriptProviderFactory:
+    | ((target: import('@agentv/core').ResolvedTarget) => import('@agentv/core').Provider)
+    | undefined;
+  if (options.transcript) {
+    const { TranscriptProvider } = await import('@agentv/core');
+    const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
+
+    // Validate: transcript lines must match total test cases across all files
+    const totalTests = [...fileMetadata.values()].reduce(
+      (sum, meta) => sum + meta.testCases.length,
+      0,
+    );
+    if (transcriptProvider.lineCount !== totalTests) {
+      throw new Error(
+        `Transcript has ${transcriptProvider.lineCount} entry(s) but eval defines ${totalTests} test(s). Each transcript line maps positionally to one test case.`,
+      );
+    }
+
+    transcriptProviderFactory = () => transcriptProvider;
+    console.log(
+      `Using transcript: ${options.transcript} (${transcriptProvider.lineCount} entry(s))`,
+    );
+  }
+
   try {
     await runWithLimit(activeTestFiles, fileConcurrency, async (testFilePath) => {
       const targetPrep = fileMetadata.get(testFilePath);
@@ -1242,11 +1297,12 @@ export async function runEvalCommand(
               selection,
               inlineTargetLabel,
               testCases: applicableTestCases,
-              trialsConfig: targetPrep.trialsConfig,
+              trialsConfig: options.transcript ? undefined : targetPrep.trialsConfig,
               matrixMode: targetPrep.selections.length > 1,
               totalBudgetUsd: targetPrep.totalBudgetUsd,
               failOnError: targetPrep.failOnError,
               threshold: resolvedThreshold,
+              providerFactory: transcriptProviderFactory,
             });
 
             return result.results;

diff --git a/apps/cli/src/commands/import/claude.ts b/apps/cli/src/commands/import/claude.ts
@@ -1,6 +1,11 @@
 import { mkdir, writeFile } from 'node:fs/promises';
 import path from 'node:path';
-import { discoverClaudeSessions, parseClaudeSession, readTranscriptFile } from '@agentv/core';
+import {
+  discoverClaudeSessions,
+  parseClaudeSession,
+  readTranscriptFile,
+  toTranscriptJsonLine,
+} from '@agentv/core';
 import { command, flag, option, optional, string } from 'cmd-ts';
 
 export const importClaudeCommand = command({
@@ -106,9 +111,9 @@ export const importClaudeCommand = command({
     // Ensure output directory exists
     await mkdir(path.dirname(outputPath), { recursive: true });
 
-    // Write transcript as JSONL (one message per line)
-    const outputLines = transcript.messages.map((msg) => JSON.stringify(msg));
-    await writeFile(outputPath, `${outputLines.join('\n')}\n`, 'utf8');
+    // Write transcript as JSONL (one line per test case, snake_case wire format)
+    const jsonLine = toTranscriptJsonLine(transcript);
+    await writeFile(outputPath, `${JSON.stringify(jsonLine)}\n`, 'utf8');
 
     const msgCount = transcript.messages.length;
     const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0);

diff --git a/apps/cli/src/commands/import/codex.ts b/apps/cli/src/commands/import/codex.ts
@@ -0,0 +1,127 @@
+import { mkdir, writeFile } from 'node:fs/promises';
+import path from 'node:path';
+import {
+  discoverCodexSessions,
+  parseCodexSession,
+  readTranscriptFile,
+  toTranscriptJsonLine,
+} from '@agentv/core';
+import { command, flag, option, optional, string } from 'cmd-ts';
+
+export const importCodexCommand = command({
+  name: 'codex',
+  description: 'Import a Codex CLI session transcript for offline grading',
+  args: {
+    discover: option({
+      type: optional(string),
+      long: 'discover',
+      description: 'Discovery mode: "latest" to import the most recent session',
+    }),
+    date: option({
+      type: optional(string),
+      long: 'date',
+      description: 'Filter sessions by date (YYYY-MM-DD)',
+    }),
+    output: option({
+      type: optional(string),
+      long: 'output',
+      short: 'o',
+      description: 'Output file path (default: .agentv/transcripts/codex-<timestamp>.jsonl)',
+    }),
+    sessionsDir: option({
+      type: optional(string),
+      long: 'sessions-dir',
+      description: 'Override the default ~/.codex/sessions directory',
+    }),
+    list: flag({
+      long: 'list',
+      description: 'List available sessions instead of importing',
+    }),
+  },
+  handler: async ({ discover, date, output, sessionsDir, list }) => {
+    if (list) {
+      const sessions = await discoverCodexSessions({
+        date,
+        sessionsDir,
+        limit: 20,
+      });
+
+      if (sessions.length === 0) {
+        console.log('No Codex CLI sessions found.');
+        return;
+      }
+
+      console.log(`Found ${sessions.length} session(s):\n`);
+      for (const session of sessions) {
+        const age = formatAge(session.updatedAt);
+        console.log(`  ${session.sessionId}  ${age}  ${session.filename}`);
+      }
+      return;
+    }
+
+    if (discover !== 'latest') {
+      console.error('Error: specify --discover latest to select a session.');
+      process.exit(1);
+    }
+
+    const sessions = await discoverCodexSessions({
+      date,
+      sessionsDir,
+      latest: true,
+    });
+
+    if (sessions.length === 0) {
+      console.error('Error: no Codex CLI sessions found.');
+      process.exit(1);
+    }
+
+    const session = sessions[0];
+    console.log(`Discovered latest session: ${session.filename}`);
+
+    // Parse the session
+    const rawJsonl = await readTranscriptFile(session.filePath);
+    const transcript = parseCodexSession(rawJsonl);
+
+    // Determine output path
+    const shortId = session.sessionId.slice(0, 8);
+    const outputPath = output ?? path.join('.agentv', 'transcripts', `codex-${shortId}.jsonl`);
+
+    // Ensure output directory exists
+    await mkdir(path.dirname(outputPath), { recursive: true });
+
+    // Write transcript as JSONL (snake_case wire format)
+    const jsonLine = toTranscriptJsonLine(transcript);
+    await writeFile(outputPath, `${JSON.stringify(jsonLine)}\n`, 'utf8');
+
+    const msgCount = transcript.messages.length;
+    const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0);
+
+    console.log(`Imported ${msgCount} messages (${toolCount} tool calls) → ${outputPath}`);
+
+    if (transcript.source.model) {
+      console.log(`  Model: ${transcript.source.model}`);
+    }
+    if (transcript.durationMs !== undefined) {
+      console.log(`  Duration: ${formatDurationMs(transcript.durationMs)}`);
+    }
+  },
+});
+
+function formatAge(date: Date): string {
+  const diffMs = Date.now() - date.getTime();
+  const diffMin = Math.floor(diffMs / 60_000);
+  if (diffMin < 60) return `${diffMin}m ago`;
+  const diffHours = Math.floor(diffMin / 60);
+  if (diffHours < 24) return `${diffHours}h ago`;
+  const diffDays = Math.floor(diffHours / 24);
+  return `${diffDays}d ago`;
+}
+
+function formatDurationMs(ms: number): string {
+  if (ms < 1000) return `${ms}ms`;
+  const seconds = Math.floor(ms / 1000);
+  if (seconds < 60) return `${seconds}s`;
+  const minutes = Math.floor(seconds / 60);
+  const remainingSeconds = seconds % 60;
+  return `${minutes}m ${remainingSeconds}s`;
+}