diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts
index 2fcc4a7e..59c0af1e 100644
--- a/apps/cli/src/commands/results/remote.ts
+++ b/apps/cli/src/commands/results/remote.ts
@@ -8,7 +8,9 @@ import {
   directPushResults,
   directorySizeBytes,
   getResultsRepoStatus,
+  listGitRuns,
   loadConfig,
+  normalizeResultsConfig,
   resolveResultsRepoRunsDir,
   syncResultsRepo,
 } from '@agentv/core';
@@ -59,15 +61,6 @@ function getStatusMessage(error: unknown): string {
   return error instanceof Error ? error.message : String(error);
 }
 
-function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
-  return {
-    repo: config.repo,
-    path: config.path,
-    auto_push: config.auto_push === true,
-    branch_prefix: config.branch_prefix?.trim() || 'eval-results',
-  };
-}
-
 function statusForResult(result: EvaluationResult): 'PASS' | 'FAIL' | 'ERROR' {
   if (result.executionStatus === 'execution_error' || result.error) {
     return 'ERROR';
@@ -185,15 +178,45 @@ export async function listMergedResultFiles(
     };
   }
 
-  const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
-    (meta) =>
-      ({
-        ...meta,
-        filename: encodeRemoteRunId(meta.filename),
-        raw_filename: meta.filename,
+  let remoteRuns: SourcedResultFileMeta[] = [];
+  if (config.mode === 'github') {
+    try {
+      const gitRuns = await listGitRuns(config.path);
+      remoteRuns = gitRuns.map((r) => ({
+        filename: encodeRemoteRunId(r.run_id),
+        raw_filename: r.run_id,
         source: 'remote' as const,
-      }) satisfies SourcedResultFileMeta,
-  );
+        path: path.join(config.path, r.manifest_path),
+        displayName: r.display_name,
+        timestamp: r.timestamp,
+        testCount: r.test_count,
+        passRate: r.pass_rate || 0,
+        avgScore: r.avg_score || 0,
+        sizeBytes: r.size_bytes || 0,
+      }));
+    } catch (error) {
+      console.error('git-native listing failed, falling back', error);
+      remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
+        (meta) =>
+          ({
+            ...meta,
+            filename: encodeRemoteRunId(meta.filename),
+            raw_filename: meta.filename,
+            source: 'remote' as const,
+          }) satisfies SourcedResultFileMeta,
+      );
+    }
+  } else {
+    remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
+      (meta) =>
+        ({
+          ...meta,
+          filename: encodeRemoteRunId(meta.filename),
+          raw_filename: meta.filename,
+          source: 'remote' as const,
+        }) satisfies SourcedResultFileMeta,
+    );
+  }
 
   const merged = [...localRuns, ...remoteRuns].sort((a, b) =>
     b.timestamp.localeCompare(a.timestamp),
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 79ca87fc..5d94a45c 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -274,49 +274,103 @@ function inferExperimentFromRunId(runId: string): string | undefined {
   return experiment;
 }
 
+const DEFAULT_RUN_PAGE_LIMIT = 50;
+
+function parseRunPageLimit(limitParam: string | undefined): number | undefined | null {
+  if (limitParam === undefined) {
+    return undefined;
+  }
+  if (!/^\d+$/.test(limitParam)) {
+    return null;
+  }
+  const limit = Number.parseInt(limitParam, 10);
+  return limit > 0 ? limit : null;
+}
+
+function paginateRuns<T extends { filename: string }>(
+  runs: T[],
+  cursor: string | undefined,
+  limit: number | undefined,
+): { runs: T[]; nextCursor?: string } {
+  if (limit === undefined) {
+    return { runs };
+  }
+
+  if (!cursor) {
+    const page = runs.slice(0, limit);
+    return {
+      runs: page,
+      ...(limit < runs.length && page.length > 0 ? { nextCursor: page.at(-1)?.filename } : {}),
+    };
+  }
+
+  const cursorIndex = runs.findIndex((run) => run.filename === cursor);
+  if (cursorIndex === -1) {
+    return { runs: [] };
+  }
+
+  const page = runs.slice(cursorIndex + 1, cursorIndex + 1 + limit);
+  return {
+    runs: page,
+    ...(cursorIndex + 1 + limit < runs.length && page.length > 0
+      ? { nextCursor: page.at(-1)?.filename }
+      : {}),
+  };
+}
+
 async function handleRuns(c: C, { searchDir, agentvDir }: DataContext) {
   const { runs: metas } = await listMergedResultFiles(searchDir);
   const { threshold: passThreshold } = loadStudioConfig(agentvDir);
-  return c.json({
-    runs: metas.map((m) => {
-      let target: string | undefined;
-      let experiment = inferExperimentFromRunId(m.raw_filename);
-      let passRate = m.passRate;
-      try {
-        const records = loadLightweightResults(m.path);
-        if (records.length > 0) {
-          target = records[0].target;
-          experiment = records[0].experiment ?? experiment;
-          passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
-        } else {
-          // Run is in-progress with 0 results written yet — fall back to the
-          // in-memory target stored when the Studio launched this run.
-          target = getActiveRunTarget(m.path);
-        }
-      } catch {
-        // ignore enrichment errors
+  const parsedLimit = parseRunPageLimit(c.req.query('limit'));
+  if (parsedLimit === null) {
+    return c.json({ error: 'limit must be a positive integer' }, 400);
+  }
+
+  const cursor = c.req.query('cursor');
+  const limit = parsedLimit ?? (cursor ? DEFAULT_RUN_PAGE_LIMIT : undefined);
+  const runs = metas.map((m) => {
+    let target: string | undefined;
+    let experiment = inferExperimentFromRunId(m.raw_filename);
+    let passRate = m.passRate;
+    try {
+      const records = loadLightweightResults(m.path);
+      if (records.length > 0) {
+        target = records[0].target;
+        experiment = records[0].experiment ?? experiment;
+        passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
+      } else {
+        // Run is in-progress with 0 results written yet — fall back to the
+        // in-memory target stored when the Studio launched this run.
+        target = getActiveRunTarget(m.path);
       }
-      // Surface live status for Studio-launched runs that are still starting
-      // or running so the RunList can render a spinner instead of the
-      // pass/fail dot derived from a 0% pass rate.
-      const liveStatus = getActiveRunStatus(m.path);
-      const tagsEntry = readRunTags(m.path);
-      return {
-        filename: m.filename,
-        display_name: m.displayName,
-        path: m.path,
-        timestamp: m.timestamp,
-        test_count: m.testCount,
-        pass_rate: passRate,
-        avg_score: m.avgScore,
-        size_bytes: m.sizeBytes,
-        source: m.source,
-        ...(target && { target }),
-        ...(experiment && { experiment }),
-        ...(tagsEntry && { tags: tagsEntry.tags }),
-        ...(liveStatus && { status: liveStatus }),
-      };
-    }),
+    } catch {
+      // ignore enrichment errors
+    }
+    // Surface live status for Studio-launched runs that are still starting
+    // or running so the RunList can render a spinner instead of the
+    // pass/fail dot derived from a 0% pass rate.
+    const liveStatus = getActiveRunStatus(m.path);
+    const tagsEntry = readRunTags(m.path);
+    return {
+      filename: m.filename,
+      display_name: m.displayName,
+      path: m.path,
+      timestamp: m.timestamp,
+      test_count: m.testCount,
+      pass_rate: passRate,
+      avg_score: m.avgScore,
+      size_bytes: m.sizeBytes,
+      source: m.source,
+      ...(target && { target }),
+      ...(experiment && { experiment }),
+      ...(tagsEntry && { tags: tagsEntry.tags }),
+      ...(liveStatus && { status: liveStatus }),
+    };
+  });
+  const page = paginateRuns(runs, cursor, limit);
+  return c.json({
+    runs: page.runs,
+    ...(page.nextCursor ? { next_cursor: page.nextCursor } : {}),
   });
 }
 
diff --git a/apps/cli/test/commands/eval/pipeline/pipeline-e2e.test.ts b/apps/cli/test/commands/eval/pipeline/pipeline-e2e.test.ts
index d2412643..a2e69585 100644
--- a/apps/cli/test/commands/eval/pipeline/pipeline-e2e.test.ts
+++ b/apps/cli/test/commands/eval/pipeline/pipeline-e2e.test.ts
@@ -1,66 +1,77 @@
-import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
+import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
 import { join } from 'node:path';
-import { afterEach, describe, expect, it } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 
 const FIXTURE_DIR = join(import.meta.dirname, 'fixtures');
-const OUT_DIR = join(import.meta.dirname, '__tmp_pipeline_e2e__');
 const CLI_ENTRY = join(import.meta.dirname, '../../../../src/cli.ts');
 const EVAL_PATH = join(FIXTURE_DIR, 'input-test.eval.yaml');
+const PIPELINE_E2E_TIMEOUT_MS = 60_000;
 
 describe('eval pipeline e2e', () => {
+  let outDir: string;
+
+  beforeEach(async () => {
+    outDir = await mkdtemp(join(tmpdir(), 'agentv-pipeline-e2e-'));
+  });
+
   afterEach(async () => {
-    await rm(OUT_DIR, { recursive: true, force: true });
+    await rm(outDir, { recursive: true, force: true });
   });
 
-  it('runs full input → grade → bench pipeline', async () => {
-    const { execa } = await import('execa');
+  it(
+    'runs full input → grade → bench pipeline',
+    async () => {
+      const { execa } = await import('execa');
 
-    // Step 1: pipeline input
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
-    const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
-    expect(manifest.test_ids).toEqual(['test-01']);
+      // Step 1: pipeline input
+      await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', outDir]);
+      const manifest = JSON.parse(await readFile(join(outDir, 'manifest.json'), 'utf8'));
+      expect(manifest.test_ids).toEqual(['test-01']);
 
-    // Step 2: Write mock response.md (simulating target execution)
-    await writeFile(join(OUT_DIR, 'input-test', 'test-01', 'response.md'), 'hello world response');
+      // Step 2: Write mock response.md (simulating target execution)
+      await writeFile(join(outDir, 'input-test', 'test-01', 'response.md'), 'hello world response');
 
-    // Step 3: pipeline grade
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', OUT_DIR]);
-    const gradeResult = JSON.parse(
-      await readFile(
-        join(OUT_DIR, 'input-test', 'test-01', 'code_grader_results', 'contains_hello.json'),
-        'utf8',
-      ),
-    );
-    expect(gradeResult.score).toBe(1);
+      // Step 3: pipeline grade
+      await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', outDir]);
+      const gradeResult = JSON.parse(
+        await readFile(
+          join(outDir, 'input-test', 'test-01', 'code_grader_results', 'contains_hello.json'),
+          'utf8',
+        ),
+      );
+      expect(gradeResult.score).toBe(1);
 
-    // Step 4: Write mock LLM grader result to disk, then run pipeline bench
-    const llmResultsDir = join(OUT_DIR, 'input-test', 'test-01', 'llm_grader_results');
-    await mkdir(llmResultsDir, { recursive: true });
-    await writeFile(
-      join(llmResultsDir, 'relevance.json'),
-      JSON.stringify({
-        score: 0.9,
-        assertions: [{ text: 'Response is relevant', passed: true, evidence: 'echoes input' }],
-      }),
-    );
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'bench', OUT_DIR]);
+      // Step 4: Write mock LLM grader result to disk, then run pipeline bench
+      const llmResultsDir = join(outDir, 'input-test', 'test-01', 'llm_grader_results');
+      await mkdir(llmResultsDir, { recursive: true });
+      await writeFile(
+        join(llmResultsDir, 'relevance.json'),
+        JSON.stringify({
+          score: 0.9,
+          assertions: [{ text: 'Response is relevant', passed: true, evidence: 'echoes input' }],
+        }),
+      );
+      await execa('bun', [CLI_ENTRY, 'pipeline', 'bench', outDir]);
 
-    // Verify final artifacts
-    const grading = JSON.parse(
-      await readFile(join(OUT_DIR, 'input-test', 'test-01', 'grading.json'), 'utf8'),
-    );
-    expect(grading.graders).toHaveLength(2);
-    expect(grading.summary.pass_rate).toBeGreaterThan(0);
+      // Verify final artifacts
+      const grading = JSON.parse(
+        await readFile(join(outDir, 'input-test', 'test-01', 'grading.json'), 'utf8'),
+      );
+      expect(grading.graders).toHaveLength(2);
+      expect(grading.summary.pass_rate).toBeGreaterThan(0);
 
-    const indexContent = await readFile(join(OUT_DIR, 'index.jsonl'), 'utf8');
-    const indexLines = indexContent
-      .trim()
-      .split('\n')
-      .map((line) => JSON.parse(line));
-    expect(indexLines).toHaveLength(1);
-    expect(indexLines[0].test_id).toBe('test-01');
+      const indexContent = await readFile(join(outDir, 'index.jsonl'), 'utf8');
+      const indexLines = indexContent
+        .trim()
+        .split('\n')
+        .map((line) => JSON.parse(line));
+      expect(indexLines).toHaveLength(1);
+      expect(indexLines[0].test_id).toBe('test-01');
 
-    const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8'));
-    expect(benchmark.run_summary).toBeDefined();
-  }, 30_000);
+      const benchmark = JSON.parse(await readFile(join(outDir, 'benchmark.json'), 'utf8'));
+      expect(benchmark.run_summary).toBeDefined();
+    },
+    PIPELINE_E2E_TIMEOUT_MS,
+  );
 });
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 75f286fb..446460f4 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -1,4 +1,5 @@
 import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test';
+import { execSync } from 'node:child_process';
 import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
 import os from 'node:os';
 import { tmpdir } from 'node:os';
@@ -58,6 +59,79 @@ function toJsonl(...records: object[]): string {
   return `${records.map((r) => JSON.stringify(r)).join('\n')}\n`;
 }
 
+function cleanGitEnv(): Record<string, string> {
+  const env: Record<string, string> = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (value !== undefined && !(key.startsWith('GIT_') && key !== 'GIT_SSH_COMMAND')) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
+function git(command: string, cwd: string): string {
+  return execSync(command, { cwd, encoding: 'utf8', env: cleanGitEnv() }).trim();
+}
+
+function initializeRemoteRepo(rootDir: string): { remoteDir: string; cloneDir: string } {
+  const remoteDir = path.join(rootDir, 'results-remote.git');
+  git(`git init --bare --initial-branch=main --quiet "${remoteDir}"`, rootDir);
+
+  const seedDir = path.join(rootDir, 'results-seed');
+  git(`git clone --quiet "${remoteDir}" "${seedDir}"`, rootDir);
+  git('git config user.email "test@example.com"', seedDir);
+  git('git config user.name "Test User"', seedDir);
+  writeFileSync(path.join(seedDir, 'README.md'), '# results repo\n');
+  git('git add README.md && git commit --quiet -m "seed repo"', seedDir);
+  git('git push --quiet origin main', seedDir);
+
+  const cloneDir = path.join(rootDir, 'results-clone');
+  git(`git clone --quiet "${remoteDir}" "${cloneDir}"`, rootDir);
+  git('git config user.email "test@example.com"', cloneDir);
+  git('git config user.name "Test User"', cloneDir);
+
+  return { remoteDir, cloneDir };
+}
+
+function writeRemoteRunArtifact(
+  cloneDir: string,
+  experiment: string,
+  timestamp: string,
+  resultRecord: object,
+): string {
+  const isoTimestamp = timestamp.replace(
+    /^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/,
+    '$1T$2:$3:$4.$5Z',
+  );
+  const runDir = path.join(cloneDir, 'runs', experiment, timestamp);
+  mkdirSync(runDir, { recursive: true });
+  writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(resultRecord));
+  writeFileSync(
+    path.join(runDir, 'benchmark.json'),
+    JSON.stringify(
+      {
+        metadata: {
+          timestamp: isoTimestamp,
+          experiment,
+          targets: ['gpt-4o'],
+          tests_run: ['test-greeting'],
+        },
+        run_summary: {
+          'gpt-4o': {
+            pass_rate: { mean: 1 },
+          },
+        },
+      },
+      null,
+      2,
+    ),
+  );
+  git(`git add "${runDir}" && git commit --quiet -m "add ${experiment}"`, cloneDir);
+  git('git push --quiet origin main', cloneDir);
+  git('git fetch --quiet origin --prune', cloneDir);
+  return `${experiment}::${timestamp}`;
+}
+
 // ── resolveSourceFile ────────────────────────────────────────────────────
 
 describe('resolveSourceFile', () => {
@@ -392,6 +466,12 @@ describe('serve app', () => {
   // ── GET /api/runs ───────────────────────────────────────────────────
 
   describe('GET /api/runs', () => {
+    function createLocalRun(baseDir: string, filename: string, ...records: object[]) {
+      const runDir = path.join(baseDir, '.agentv', 'results', 'runs', filename);
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(...records));
+    }
+
     it('returns empty runs list for temp directory', async () => {
       const app = createApp([], tempDir, undefined, undefined, { studioDir });
       const res = await app.request('/api/runs');
@@ -400,6 +480,65 @@ describe('serve app', () => {
       expect(data.runs).toEqual([]);
     });
 
+    it('supports cursor pagination when limit is provided', async () => {
+      createLocalRun(tempDir, '2026-03-25T10-00-00-000Z', RESULT_A);
+      createLocalRun(tempDir, '2026-03-25T11-00-00-000Z', RESULT_A);
+      createLocalRun(tempDir, '2026-03-25T12-00-00-000Z', RESULT_A);
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const firstRes = await app.request('/api/runs?limit=2');
+      expect(firstRes.status).toBe(200);
+      const firstPage = (await firstRes.json()) as {
+        runs: Array<{ filename: string }>;
+        next_cursor?: string;
+      };
+      expect(firstPage.runs.map((run) => run.filename)).toEqual([
+        '2026-03-25T12-00-00-000Z',
+        '2026-03-25T11-00-00-000Z',
+      ]);
+      expect(firstPage.next_cursor).toBe('2026-03-25T11-00-00-000Z');
+
+      const secondRes = await app.request(
+        `/api/runs?limit=2&cursor=${encodeURIComponent(firstPage.next_cursor ?? '')}`,
+      );
+      expect(secondRes.status).toBe(200);
+      const secondPage = (await secondRes.json()) as {
+        runs: Array<{ filename: string }>;
+        next_cursor?: string;
+      };
+      expect(secondPage.runs.map((run) => run.filename)).toEqual(['2026-03-25T10-00-00-000Z']);
+      expect(secondPage.next_cursor).toBeUndefined();
+    });
+
+    it('returns an empty page for unknown cursors', async () => {
+      createLocalRun(tempDir, '2026-03-25T10-00-00-000Z', RESULT_A);
+      createLocalRun(tempDir, '2026-03-25T11-00-00-000Z', RESULT_A);
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request('/api/runs?limit=1&cursor=missing-run');
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as {
+        runs: Array<{ filename: string }>;
+        next_cursor?: string;
+      };
+      expect(data.runs).toEqual([]);
+      expect(data.next_cursor).toBeUndefined();
+    });
+
+    it('rejects invalid pagination limits', async () => {
+      createLocalRun(tempDir, '2026-03-25T10-00-00-000Z', RESULT_A);
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request('/api/runs?limit=0');
+
+      expect(res.status).toBe(400);
+      await expect(res.json()).resolves.toEqual({
+        error: 'limit must be a positive integer',
+      });
+    });
+
     it('tags local runs with source metadata', async () => {
       const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
       mkdirSync(runsDir, { recursive: true });
@@ -501,18 +640,15 @@ describe('serve app', () => {
         writeFileSync(
           path.join(tempDir, '.agentv', 'config.yaml'),
           `results:
+  mode: github
   repo: EntityProcess/agentv-evals
-  path: autopilot-dev/runs
 `,
         );
 
         const remoteRunDir = path.join(
           process.env.AGENTV_HOME,
-          'cache',
-          'results-repo',
+          'results',
           'EntityProcess-agentv-evals',
-          'repo',
-          'autopilot-dev',
           'runs',
           'default',
           '2026-03-26T10-00-00-000Z',
@@ -540,6 +676,53 @@ describe('serve app', () => {
         }
       }
     });
+
+    it('lists and loads git-native remote runs from the configured clone path', async () => {
+      const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
+      const runId = writeRemoteRunArtifact(
+        cloneDir,
+        'green-uat',
+        '2026-03-26T10-00-00-000Z',
+        RESULT_A,
+      );
+
+      mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
+      writeFileSync(
+        path.join(tempDir, '.agentv', 'config.yaml'),
+        `results:
+  mode: github
+  repo: file://${remoteDir}
+  path: ${cloneDir}
+`,
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const listRes = await app.request('/api/runs');
+      expect(listRes.status).toBe(200);
+      const listData = (await listRes.json()) as {
+        runs: Array<{ filename: string; source: string; experiment?: string; pass_rate?: number }>;
+      };
+      expect(listData.runs).toHaveLength(1);
+      expect(listData.runs[0]).toMatchObject({
+        filename: `remote::${runId}`,
+        source: 'remote',
+        experiment: 'green-uat',
+        pass_rate: 1,
+      });
+
+      const detailRes = await app.request(
+        `/api/runs/${encodeURIComponent(listData.runs[0].filename)}`,
+      );
+      expect(detailRes.status).toBe(200);
+      const detailData = (await detailRes.json()) as {
+        source: string;
+        results: Array<{ test_id?: string; testId?: string }>;
+      };
+      expect(detailData.source).toBe('remote');
+      expect(detailData.results).toHaveLength(1);
+      expect(detailData.results[0]).toMatchObject({ testId: 'test-greeting' });
+    }, 15000);
   });
 
   describe('GET /api/projects/all-runs', () => {
@@ -581,29 +764,42 @@ describe('serve app', () => {
 
   describe('GET /api/remote/status', () => {
     it('reports configured remote status with graceful local-only fallback', async () => {
-      mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
-      writeFileSync(
-        path.join(tempDir, '.agentv', 'config.yaml'),
-        `results:
+      const previousHome = process.env.AGENTV_HOME;
+      process.env.AGENTV_HOME = path.join(tempDir, 'agentv-home-status');
+
+      try {
+        mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
+        writeFileSync(
+          path.join(tempDir, '.agentv', 'config.yaml'),
+          `results:
+  mode: github
   repo: EntityProcess/agentv-evals
-  path: autopilot-dev/runs
 `,
-      );
+        );
 
-      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
-      const res = await app.request('/api/remote/status');
+        const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+        const res = await app.request('/api/remote/status');
 
-      expect(res.status).toBe(200);
-      const data = (await res.json()) as {
-        configured: boolean;
-        available: boolean;
-        repo: string;
-        path: string;
-      };
-      expect(data.configured).toBe(true);
-      expect(data.available).toBe(false);
-      expect(data.repo).toBe('EntityProcess/agentv-evals');
-      expect(data.path).toBe('autopilot-dev/runs');
+        expect(res.status).toBe(200);
+        const data = (await res.json()) as {
+          configured: boolean;
+          available: boolean;
+          repo: string;
+          path: string;
+        };
+        expect(data.configured).toBe(true);
+        expect(data.available).toBe(false);
+        expect(data.repo).toBe('EntityProcess/agentv-evals');
+        expect(data.path).toBe(
+          path.join(tempDir, 'agentv-home-status', 'results', 'EntityProcess-agentv-evals'),
+        );
+      } finally {
+        if (previousHome === undefined) {
+          process.env.AGENTV_HOME = undefined;
+        } else {
+          process.env.AGENTV_HOME = previousHome;
+        }
+      }
     });
   });
 
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
index 3ada5bb4..8db576c6 100644
--- a/apps/cli/test/eval.integration.test.ts
+++ b/apps/cli/test/eval.integration.test.ts
@@ -20,7 +20,6 @@ const __dirname = path.dirname(__filename);
 const projectRoot = path.resolve(__dirname, '../../..');
 const CLI_ENTRY = path.join(projectRoot, 'apps/cli/src/cli.ts');
 const MOCK_RUNNER = path.join(projectRoot, 'apps/cli/test/fixtures/mock-run-evaluation.ts');
-
 async function createFixture(): Promise<EvalFixture> {
   const baseDir = await mkdtemp(path.join(tmpdir(), 'agentv-cli-test-'));
   const suiteDir = path.join(baseDir, 'suite');
@@ -201,22 +200,6 @@ async function readDiagnostics(fixture: EvalFixture): Promise<Record<string, unk
 }
 
 describe('agentv eval CLI', () => {
-  it('documents the bare `eval` shorthand in eval help', async () => {
-    const fixture = await createFixture();
-    try {
-      const { stdout } = await runCli(fixture, ['eval', '--help']);
-
-      expect(stdout).toContain('Evaluation commands.');
-      expect(stdout).toContain('agentv eval <eval-paths...>');
-      expect(stdout).toContain('agentv eval run <eval-paths...>');
-      expect(stdout).toContain('- run');
-      expect(stdout).toContain('- assert');
-      expect(stdout).toContain('- aggregate');
-    } finally {
-      await rm(fixture.baseDir, { recursive: true, force: true });
-    }
-  });
-
   it('writes results, summary, and prompt dumps using default directories', async () => {
     const fixture = await createFixture();
     try {
diff --git a/apps/studio/src/components/RunList.tsx b/apps/studio/src/components/RunList.tsx
index 974d169a..966cf991 100644
--- a/apps/studio/src/components/RunList.tsx
+++ b/apps/studio/src/components/RunList.tsx
@@ -13,6 +13,7 @@
  */
 
 import type React from 'react';
+import { useEffect, useRef } from 'react';
 
 import { Link } from '@tanstack/react-router';
 
@@ -26,6 +27,9 @@ interface RunListProps {
   runs: RunMeta[];
   projectId?: string;
   emptyMessage?: React.ReactNode;
+  hasNextPage?: boolean;
+  isFetchingNextPage?: boolean;
+  onLoadMore?: () => void;
 }
 
 function formatDate(ts: string | undefined | null): { date: string; full: string } {
@@ -48,9 +52,50 @@ function formatDate(ts: string | undefined | null): { date: string; full: string
   }
 }
 
-export function RunList({ runs, projectId, emptyMessage }: RunListProps) {
+export function RunList({
+  runs,
+  projectId,
+  emptyMessage,
+  hasNextPage = false,
+  isFetchingNextPage = false,
+  onLoadMore,
+}: RunListProps) {
   const { data: config } = useStudioConfig(projectId);
   const passThreshold = config?.threshold ?? DEFAULT_PASS_THRESHOLD;
+  const sentinelRef = useRef<HTMLTableRowElement | null>(null);
+  const requestingNextPageRef = useRef(false);
+
+  useEffect(() => {
+    if (!isFetchingNextPage) {
+      requestingNextPageRef.current = false;
+    }
+  }, [isFetchingNextPage]);
+
+  useEffect(() => {
+    if (!hasNextPage || !onLoadMore) {
+      return;
+    }
+    const node = sentinelRef.current;
+    if (!node) {
+      return;
+    }
+
+    const observer = new IntersectionObserver(
+      (entries) => {
+        if (
+          entries.some((entry) => entry.isIntersecting) &&
+          !isFetchingNextPage &&
+          !requestingNextPageRef.current
+        ) {
+          requestingNextPageRef.current = true;
+          onLoadMore();
+        }
+      },
+      { rootMargin: '200px 0px' },
+    );
+    observer.observe(node);
+    return () => observer.disconnect();
+  }, [hasNextPage, isFetchingNextPage, onLoadMore]);
 
   if (runs.length === 0) {
     return (
@@ -155,6 +200,13 @@ export function RunList({ runs, projectId, emptyMessage }: RunListProps) {
               </tr>
             );
           })}
+          {(hasNextPage || isFetchingNextPage) && (
+            <tr ref={sentinelRef}>
+              <td colSpan={7} className="px-4 py-3 text-center text-xs text-gray-500">
+                {isFetchingNextPage ? 'Loading more runs…' : 'Scroll to load more…'}
+              </td>
+            </tr>
+          )}
         </tbody>
       </table>
     </div>
diff --git a/apps/studio/src/lib/api.ts b/apps/studio/src/lib/api.ts
index 883663c8..67e51fc6 100644
--- a/apps/studio/src/lib/api.ts
+++ b/apps/studio/src/lib/api.ts
@@ -5,7 +5,12 @@
  * and the same-origin Hono server serves in production.
  */
 
-import { queryOptions, useQuery } from '@tanstack/react-query';
+import {
+  infiniteQueryOptions,
+  queryOptions,
+  useInfiniteQuery,
+  useQuery,
+} from '@tanstack/react-query';
 
 import type {
   CategoriesResponse,
@@ -59,12 +64,40 @@ async function fetchText(url: string): Promise<string | null> {
 
 // ── Query option factories ──────────────────────────────────────────────
 
+const RUNS_PAGE_LIMIT = 50;
+
+function buildRunListUrl(baseUrl: string, cursor?: string): string {
+  const params = new URLSearchParams({ limit: String(RUNS_PAGE_LIMIT) });
+  if (cursor) {
+    params.set('cursor', cursor);
+  }
+  return `${baseUrl}?${params.toString()}`;
+}
+
+function flattenRunListPages(pages: RunListResponse[] | undefined): RunListResponse {
+  if (!pages || pages.length === 0) {
+    return { runs: [] };
+  }
+  return {
+    runs: pages.flatMap((page) => page.runs),
+    next_cursor: pages.at(-1)?.next_cursor,
+  };
+}
+
 export const runListOptions = queryOptions({
   queryKey: ['runs'],
   queryFn: () => fetchJson<RunListResponse>('/api/runs'),
   refetchInterval: 5_000,
 });
 
+export const infiniteRunListOptions = infiniteQueryOptions({
+  queryKey: ['runs', 'infinite'],
+  initialPageParam: undefined as string | undefined,
+  queryFn: ({ pageParam }) => fetchJson<RunListResponse>(buildRunListUrl('/api/runs', pageParam)),
+  getNextPageParam: (lastPage) => lastPage.next_cursor,
+  refetchInterval: 5_000,
+});
+
 export function runDetailOptions(filename: string) {
   return queryOptions({
     queryKey: ['runs', filename],
@@ -206,6 +239,14 @@ export function useRunList() {
   return useQuery(runListOptions);
 }
 
+export function useInfiniteRunList() {
+  const query = useInfiniteQuery(infiniteRunListOptions);
+  return {
+    ...query,
+    data: flattenRunListPages(query.data?.pages),
+  };
+}
+
 export function useRunDetail(filename: string) {
   return useQuery(runDetailOptions(filename));
 }
@@ -327,10 +368,30 @@ export function projectRunListOptions(projectId: string) {
   });
 }
 
+export function infiniteProjectRunListOptions(projectId: string) {
+  return infiniteQueryOptions({
+    queryKey: ['projects', projectId, 'runs', 'infinite'],
+    initialPageParam: undefined as string | undefined,
+    queryFn: ({ pageParam }) =>
+      fetchJson<RunListResponse>(buildRunListUrl(`${projectApiBase(projectId)}/runs`, pageParam)),
+    getNextPageParam: (lastPage) => lastPage.next_cursor,
+    enabled: !!projectId,
+    refetchInterval: 5_000,
+  });
+}
+
 export function useProjectRunList(projectId: string) {
   return useQuery(projectRunListOptions(projectId));
 }
 
+export function useInfiniteProjectRunList(projectId: string) {
+  const query = useInfiniteQuery(infiniteProjectRunListOptions(projectId));
+  return {
+    ...query,
+    data: flattenRunListPages(query.data?.pages),
+  };
+}
+
 export function projectRunDetailOptions(projectId: string, filename: string) {
   return queryOptions({
     queryKey: ['projects', projectId, 'runs', filename],
diff --git a/apps/studio/src/lib/types.ts b/apps/studio/src/lib/types.ts
index 748300a6..595babb0 100644
--- a/apps/studio/src/lib/types.ts
+++ b/apps/studio/src/lib/types.ts
@@ -32,6 +32,7 @@ export interface RunMeta {
 
 export interface RunListResponse {
   runs: RunMeta[];
+  next_cursor?: string;
 }
 
 export interface TokenUsage {
@@ -257,7 +258,7 @@ export interface RemoteStatusResponse {
   configured: boolean;
   available: boolean;
   repo?: string;
-  cache_dir?: string;
+  local_dir?: string;
   path?: string;
   auto_push?: boolean;
   branch_prefix?: string;
diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx
index 8461ab54..921889c6 100644
--- a/apps/studio/src/routes/index.tsx
+++ b/apps/studio/src/routes/index.tsx
@@ -22,12 +22,13 @@ import {
   syncRemoteResultsApi,
   useCompare,
   useEvalRuns,
+  useInfiniteRunList,
   useProjectList,
   useRemoteStatus,
-  useRunList,
   useStudioConfig,
 } from '~/lib/api';
 import { type StudioTabId, resolveIndexRoute } from '~/lib/navigation';
+import type { RunMeta } from '~/lib/types';
 type TabId = StudioTabId;
 
 const tabs: { id: TabId; label: string }[] = [
@@ -184,7 +185,8 @@ function SingleProjectHome() {
   const tab = searchParams.tab as TabId | undefined;
   const navigate = useNavigate();
   const queryClient = useQueryClient();
-  const { data, isLoading, error } = useRunList();
+  const { data, isLoading, error, hasNextPage, fetchNextPage, isFetchingNextPage } =
+    useInfiniteRunList();
   const { data: remoteStatus } = useRemoteStatus();
   const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
@@ -265,6 +267,9 @@ function SingleProjectHome() {
           remoteStatus={remoteStatus}
           syncInFlight={syncInFlight}
           onSyncRemote={handleSyncRemote}
+          hasNextPage={hasNextPage}
+          isFetchingNextPage={isFetchingNextPage}
+          onLoadMore={() => void fetchNextPage()}
         />
       )}
       {activeTab === 'experiments' && <ExperimentsTab />}
@@ -298,8 +303,11 @@ function RunsTabContent({
   remoteStatus,
   syncInFlight,
   onSyncRemote,
+  hasNextPage,
+  isFetchingNextPage,
+  onLoadMore,
 }: {
-  runs: NonNullable<ReturnType<typeof useRunList>['data']>['runs'];
+  runs: RunMeta[];
   isLoading: boolean;
   error: Error | null;
   sourceFilter: RunSourceFilter;
@@ -307,6 +315,9 @@ function RunsTabContent({
   remoteStatus: ReturnType<typeof useRemoteStatus>['data'];
   syncInFlight: boolean;
   onSyncRemote: () => void;
+  hasNextPage: boolean | undefined;
+  isFetchingNextPage: boolean;
+  onLoadMore: () => void;
 }) {
   if (isLoading) {
     return <LoadingSkeleton />;
@@ -332,6 +343,9 @@ function RunsTabContent({
       />
       <RunList
         runs={runs}
+        hasNextPage={hasNextPage}
+        isFetchingNextPage={isFetchingNextPage}
+        onLoadMore={onLoadMore}
         emptyMessage={
           sourceFilter === 'remote' ? (
             remoteStatus?.configured ? (
diff --git a/apps/studio/src/routes/projects/$projectId.tsx b/apps/studio/src/routes/projects/$projectId.tsx
index bb54cc72..62154143 100644
--- a/apps/studio/src/routes/projects/$projectId.tsx
+++ b/apps/studio/src/routes/projects/$projectId.tsx
@@ -18,7 +18,7 @@ import {
   projectCompareOptions,
   syncRemoteResultsApi,
   useEvalRuns,
-  useProjectRunList,
+  useInfiniteProjectRunList,
   useRemoteStatus,
   useStudioConfig,
 } from '~/lib/api';
@@ -109,7 +109,8 @@ function ProjectHomePage() {
 
 function ProjectRunsTab({ projectId }: { projectId: string }) {
   const queryClient = useQueryClient();
-  const { data, isLoading, error } = useProjectRunList(projectId);
+  const { data, isLoading, error, hasNextPage, fetchNextPage, isFetchingNextPage } =
+    useInfiniteProjectRunList(projectId);
   const { data: activeRunsData } = useEvalRuns(projectId);
   const { data: remoteStatus } = useRemoteStatus(projectId);
   const [sourceFilter, setSourceFilter] = useState<RunSourceFilter>('all');
@@ -195,7 +196,13 @@ function ProjectRunsTab({ projectId }: { projectId: string }) {
         syncInFlight={syncInFlight}
         onSync={handleSyncRemote}
       />
-      <RunList runs={filteredRuns} projectId={projectId} />
+      <RunList
+        runs={filteredRuns}
+        projectId={projectId}
+        hasNextPage={hasNextPage}
+        isFetchingNextPage={isFetchingNextPage}
+        onLoadMore={() => void fetchNextPage()}
+      />
     </div>
   );
 }
diff --git a/docs/plans/git-native-results-goal.md b/docs/plans/git-native-results-goal.md
new file mode 100644
index 00000000..d5db62ff
--- /dev/null
+++ b/docs/plans/git-native-results-goal.md
@@ -0,0 +1,42 @@
+# Goal: Complete git-native-results PR (#1261)
+
+## Objective
+Implement the git-native results storage architecture and land PR #1261 as a clean, tested, manually verified change.
+
+## Success Criteria
+- All implementation passes completed per design doc
+- Full test suite green (unit + integration + existing 1782 core + 553 CLI tests)
+- E2E manual test using agent-browser against real test results repo
+- Red/green UAT documented before review
+- No regressions
+
+## Work Location
+- Worktree: `agentv.worktrees/git-native-results/`
+- Branch: `feat/git-native-results`
+
+## Key Decisions Confirmed
+- Dedicated results repo model → write directly to `main` of results repo (no separate branch needed)
+- Use raw `git` subprocess (not go-git) for ls-tree / cat-file path
+- Follow exact order in design doc
+
+## Non-Goals
+- P5 zero-config mode
+- Caching
+- Multi-mode beyond github
+
+## Verification
+1. Automated tests
+2. Manual agent-browser E2E in Studio
+3. Performance check with 500+ runs repo
+4. Lint + typecheck clean
+
+Owner: Agent + Chris T
+
+## Latest Progress (2026-05-21)
+
+- Docker ownership fix implemented in docker-compose.yml (`user: "${UID}:${GID}"`)
+- Write path (`commitAndPushRun`) largely complete via parallel work
+- Read path functional but needs hardening
+- Bun dependencies reinstalled in worktree
+- GitHub Actions currently failing on dependency resolution in CI
+- Next focus: Fix CI, add tests, implement pagination
diff --git a/docs/plans/git-native-results.md b/docs/plans/git-native-results.md
new file mode 100644
index 00000000..1d625f3c
--- /dev/null
+++ b/docs/plans/git-native-results.md
@@ -0,0 +1,162 @@
+# Git-native results storage
+
+**Status**: design approved, implementation pending
+**Tracks**: issue #1259 (supersedes closed PR #1260)
+**Scope**: single PR; breaking changes accepted (no production users yet)
+
+---
+
+## Why
+
+`/api/runs` polls every 5s and does O(N) per-manifest reads (`readdir` + `statSync` + `loadResultFile` per run). At hundreds of runs it stalls; at thousands it falls over. The original PR #1260 tried to fix this with an append-only `index/runs.jsonl` file, which works but adds a second source of truth that can drift, grows forever, and requires a sha-amend dance plus a `reindex` migration command.
+
+After comparing with **entireio** (single-ref + git tree as index) and **skillfully** (explicit `sourceMode = github_import` pattern with PR-based writes for human-curated content), the cleaner architecture treats **git as the canonical store**, not as a transport layer.
+
+## Core idea
+
+The git tree IS the index. `git ls-tree -r origin/main -- runs/` lists every run path without reading any blob. `git cat-file --batch` reads existing `benchmark.json` blobs in one subprocess call. No separate index file. No drift. Natural pruning when runs are deleted. With `--filter=blob:none` clone, individual run blobs are only fetched lazily when a user opens the detail view.
+
+## Architecture
+
+### Storage
+
+- The configured remote `results.repo` is **the** storage location.
+- The local clone at `results.path` (filesystem path) is the working copy.
+- No more `.agentv/results/runs/` writes in the source project. No more gitignored results.
+
+```yaml
+# config.yaml
+results:
+  mode: github                       # required, only valid value today
+  repo: myorg/eval-results           # remote
+  path: ~/data/agentv-results        # optional; default ~/.agentv/results/<slug>/
+  auto_push: true                    # default
+```
+
+`mode: github` is explicit (extension point; mirrors skillfully's `sourceMode` pattern). `path` is the **local filesystem location** of the clone (breaking change — was previously the subdir within the remote repo). Runs always land at `<clone>/runs/<experiment>/<timestamp>/` regardless.
+
+### Writes
+
+Every `agentv eval` is one atomic operation:
+
+1. `git fetch origin --prune` (refresh; no checkout)
+2. Write artifacts into working tree at `<clone>/runs/<experiment>/<timestamp>/`
+3. `git add runs/<experiment>/<timestamp>/`
+4. `git commit -m "<title>" -m "Agentv-Run: <run-id>"` (P6 trailer baked in)
+5. If `auto_push`: `git push origin HEAD:main` with retry-on-non-fast-forward (rebase + retry)
+
+Each run is one commit. Files are unique to that run, so rebases never content-conflict.
+
+### Reads
+
+**Listing** (replaces `listResultFilesFromRunsDir`):
+- `git ls-tree -r origin/main -- runs/` → filter for `benchmark.json` paths
+- `git cat-file --batch` → read those blobs in one subprocess
+- Derive `run_id` from path (same logic as current `buildRunId`)
+- Sort by timestamp descending
+- Apply cursor pagination
+
+**Detail view file reads** (replaces `readFileSync(meta.path)`):
+- Committed: `git cat-file -p origin/main:runs/.../<file>`
+- In-progress (post-write, pre-commit): `readFileSync(<path>)` from working tree
+
+**In-progress detection**: between artifact write and commit, files exist only in the working tree. `git status --porcelain runs/` surfaces them; merge with the committed list for the Studio runs view.
+
+### Sync
+
+- `agentv eval` does its own fetch + push (no separate sync needed for own work)
+- `agentv results sync` = `git fetch origin --prune` (refresh view of others' work)
+- No more `git checkout`, no more `git pull --ff-only`
+- Studio polls `/api/runs` which reads from git object DB (already current after the most recent fetch)
+
+### Pagination
+
+`/api/runs?limit=50&cursor=<run_id>`:
+- Cursor is the `run_id` of the last item from the previous page
+- Server reads the full sorted list (one `git ls-tree` + one `git cat-file --batch`), finds the cursor, slices `[cursorIdx+1 : cursorIdx+1+limit]`, returns `next_cursor` if more remain
+- Studio uses `useInfiniteQuery` + an `IntersectionObserver` sentinel row
+
+## Implementation passes
+
+The PR is large but bounded. Suggested order within the single PR:
+
+### Pass 1 — config + paths
+
+- Update `ResultsConfig` schema: require `mode: github`, repurpose `path` as filesystem location
+- Rename `getResultsRepoCachePaths` → `getResultsRepoLocalPaths`
+- Rename `cache_dir` → `local_dir` in `ResultsRepoStatus` (wire format too)
+- Add config validation: refuse old-style `path: runs` values with migration message
+
+### Pass 2 — write path
+
+- Replace `.agentv/results/runs/` writes with direct writes to `<results.path>/runs/...`
+- `directPushResults` becomes the only write path (rename to `commitAndPushRun` since it's no longer just a "direct push" mode)
+- Add `Agentv-Run:` commit trailer
+- Drop `git checkout` from `updateCacheRepo` — only `git fetch --prune` remains
+- Rename `updateCacheRepo` → `fetchResultsRepo`
+
+### Pass 3 — read path
+
+- New `listResultFilesFromGitTree(repoDir, baseBranch)` using `git ls-tree` + `git cat-file --batch` on `benchmark.json` blobs
+- Replace `listResultFilesFromRunsDir` calls for remote runs with the new function
+- Detail view reads in `serve.ts` use `git cat-file -p <ref>:<path>` for committed runs
+- Working-tree readdir for in-progress runs (detected via `git status --porcelain`)
+- Drop `loadLightweightResults` enrichment loop in `handleRuns` — `benchmark.json` already has `target`, `experiment`, and `pass_rate`
+
+### Pass 4 — pagination
+
+- `/api/runs` accepts `limit` and `cursor` query params
+- Server slices the sorted list by cursor, returns `next_cursor`
+- `RunListResponse` gets `next_cursor?: string`
+- Studio: `runListOptions` → `infiniteQueryOptions`
+- `RunList.tsx`: flatten pages, add `IntersectionObserver` sentinel
+
+### Pass 5 — cleanup
+
+- Remove the entire P1 PR scope (closed PR #1260): `RunIndexEntry`, `appendToRunIndex`, `readRunIndex`, `reindexResultsRepo`, `agentv results reindex` command, `index/runs.jsonl` writes
+- Remove `localResults` listing — local-only mode is no longer supported
+- Remove `SourcedResultFileMeta.source` field — runs are no longer "local" or "remote", they're either committed or in-progress
+- Update docs site (`apps/web/src/content/docs/`)
+- Update skill files (`plugins/agentv-dev/skills/agentv-eval-builder/`)
+- Update examples that hardcoded `.agentv/results/runs/` paths
+
+## Breaking changes
+
+| Change | Impact |
+|--------|--------|
+| `results.repo` becomes required | Users without a results repo can't run evals until they configure one |
+| `results.path` repurposed (subdir → filesystem path) | Existing configs with `path: runs` fail loudly with migration message |
+| No more `.agentv/results/runs/` writes | Project-local results no longer exist; everything lives in the configured `path` |
+| `cache_dir` → `local_dir` in status responses | Studio + any external scripts reading status need to update |
+| `SourcedResultFileMeta.source` removed | Studio "source" badge becomes "in progress / shared" |
+
+Breaking changes accepted because no production users yet. Document in release notes; require fresh config to upgrade.
+
+## Test plan
+
+- Unit tests for `git ls-tree` + `git cat-file --batch` parsing helpers
+- Integration test that spins up a tmp git repo, writes runs via the new write path, lists via the new read path, asserts results
+- Pagination unit tests (cursor in/out of bounds, exact-boundary cases)
+- E2E: run an actual eval against a real (test-scoped) results repo, verify the commit lands with the `Agentv-Run:` trailer, `git ls-tree` shows the run, Studio renders it
+
+## Deferred to future PRs
+
+- **P5 zero-config same-repo mode** — write to `refs/agentv/runs/v1` in the source repo when no `results.repo` is configured. Independent feature; design pattern works the same.
+- **Multi-mode support** — if a cloud Studio gets built later, `mode: cloud` would mirror skillfully's "managed in Skillfully" mode. The current explicit `mode: github` field is the extension point.
+- **PR-based publishing** — for human-curated content. Eval results are machine-generated, so direct commit is correct. If users want review-before-merge for sensitive evals (e.g., regulatory benchmarks), add `share: auto-pr` later.
+- **In-memory list caching** — P2 from #1259. The git-object-DB read path is fast enough that caching is not needed today. Revisit if profiling shows it's a bottleneck.
+
+## Open implementation questions
+
+1. **Branch model**: `origin/main` or a dedicated `origin/agentv-runs/main`? Current vote: `main`, since this is a dedicated results repo.
+2. **What to do on `git fetch` failures during `agentv eval`**? Current vote: warn, proceed with stale local state, surface the error in Studio. Don't block the eval — local commit always works.
+3. **`gh` CLI dependency**: stays scoped to existing PR-related code paths. The new git-native flow uses raw `git` only.
+
+## What this PR does NOT do
+
+- Doesn't add a separate index file (the index IS the git tree)
+- Doesn't ship a `reindex` migration command (nothing to backfill — `benchmark.json` already exists per run)
+- Doesn't change the artifact format (`benchmark.json`, `index.jsonl`, per-test dirs stay as-is)
+- Doesn't add server-side caching (deferred)
+- Doesn't add PR-based publishing (deferred)
+- Doesn't touch the source repo's commit history (only the configured `results.repo`)
diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts
index b7603f2d..462a79e7 100644
--- a/packages/core/src/evaluation/loaders/config-loader.ts
+++ b/packages/core/src/evaluation/loaders/config-loader.ts
@@ -37,8 +37,10 @@ export type ExecutionDefaults = {
 };
 
 export type ResultsConfig = {
+  readonly mode: 'github';
   readonly repo: string;
-  readonly path: string;
+  /** Local filesystem path for the results clone. Optional; defaults to ~/.agentv/results/<slug>/. */
+  readonly path?: string;
   readonly auto_push?: boolean;
   readonly branch_prefix?: string;
 };
@@ -558,6 +560,16 @@ export function parseExecutionDefaults(
   return Object.keys(result).length > 0 ? (result as ExecutionDefaults) : undefined;
 }
 
+function isFilesystemPath(p: string): boolean {
+  return (
+    p.startsWith('/') ||
+    p.startsWith('~/') ||
+    p.startsWith('~\\') ||
+    p === '~' ||
+    /^[A-Za-z]:[/\\]/.test(p)
+  );
+}
+
 export function parseResultsConfig(raw: unknown, configPath: string): ResultsConfig | undefined {
   if (raw === undefined || raw === null) {
     return undefined;
@@ -568,17 +580,32 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
   }
 
   const obj = raw as Record<string, unknown>;
-  const repo = typeof obj.repo === 'string' ? obj.repo.trim() : '';
-  const resultsPath = typeof obj.path === 'string' ? obj.path.trim() : '';
 
+  if (obj.mode !== 'github') {
+    logWarning(`Invalid results.mode in ${configPath}, expected 'github'`);
+    return undefined;
+  }
+
+  const repo = typeof obj.repo === 'string' ? obj.repo.trim() : '';
   if (!repo) {
     logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
     return undefined;
   }
 
-  if (!resultsPath) {
-    logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
-    return undefined;
+  let resultsPath: string | undefined;
+  if (obj.path !== undefined) {
+    if (typeof obj.path !== 'string' || obj.path.trim().length === 0) {
+      logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
+      return undefined;
+    }
+    const trimmedPath = obj.path.trim();
+    if (!isFilesystemPath(trimmedPath)) {
+      logWarning(
+        `Invalid results.path in ${configPath}: '${trimmedPath}' looks like a repo subdirectory. results.path now specifies the local filesystem directory for the clone (e.g., ~/data/agentv-results). Remove 'path' to use the default or set an absolute/home-relative path.`,
+      );
+      return undefined;
+    }
+    resultsPath = trimmedPath;
   }
 
   if (obj.auto_push !== undefined && typeof obj.auto_push !== 'boolean') {
@@ -596,8 +623,9 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
   }
 
   return {
+    mode: 'github',
     repo,
-    path: resultsPath,
+    ...(resultsPath !== undefined && { path: resultsPath }),
     ...(typeof obj.auto_push === 'boolean' && { auto_push: obj.auto_push }),
     ...(branchPrefix && { branch_prefix: branchPrefix }),
   };
diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts
index 04419785..be0f0aa3 100644
--- a/packages/core/src/evaluation/results-repo.ts
+++ b/packages/core/src/evaluation/results-repo.ts
@@ -1,4 +1,4 @@
-import { execFile } from 'node:child_process';
+import { execFile, spawn } from 'node:child_process';
 import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
 import { cp, mkdtemp, readdir, rm, stat } from 'node:fs/promises';
 import os from 'node:os';
@@ -10,7 +10,7 @@ import type { ResultsConfig } from './loaders/config-loader.js';
 
 const execFileAsync = promisify(execFile);
 
-export interface ResultsRepoCachePaths {
+export interface ResultsRepoLocalPaths {
   readonly rootDir: string;
   readonly repoDir: string;
   readonly statusFile: string;
@@ -23,7 +23,7 @@ export interface ResultsRepoStatus {
   readonly path?: string;
   readonly auto_push?: boolean;
   readonly branch_prefix?: string;
-  readonly cache_dir?: string;
+  readonly local_dir?: string;
   readonly last_synced_at?: string;
   readonly last_error?: string;
 }
@@ -61,10 +61,22 @@ function withFriendlyGitHubAuthError(error: unknown): Error {
   return new Error(message);
 }
 
+function expandHome(p: string): string {
+  if (p === '~' || p.startsWith('~/') || p.startsWith('~\\')) {
+    return path.join(os.homedir(), p.slice(1));
+  }
+  return p;
+}
+
 export function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
+  const repo = config.repo.trim();
+  const resolvedPath = config.path
+    ? expandHome(config.path.trim())
+    : path.join(getAgentvHome(), 'results', sanitizeRepoSlug(repo));
   return {
-    repo: config.repo.trim(),
-    path: config.path.trim().replace(/^\/+|\/+$/g, ''),
+    mode: 'github',
+    repo,
+    path: resolvedPath,
     auto_push: config.auto_push === true,
     branch_prefix: config.branch_prefix?.trim() || 'eval-results',
   };
@@ -77,7 +89,7 @@ export function resolveResultsRepoUrl(repo: string): string {
   return `https://github.com/${repo}.git`;
 }
 
-export function getResultsRepoCachePaths(repo: string): ResultsRepoCachePaths {
+export function getResultsRepoLocalPaths(repo: string): ResultsRepoLocalPaths {
   const rootDir = path.join(getAgentvHome(), 'cache', 'results-repo', sanitizeRepoSlug(repo));
   return {
     rootDir,
@@ -106,12 +118,12 @@ function writePersistedStatus(statusFile: string, status: PersistedStatus): void
 async function runCommand(
   executable: string,
   args: readonly string[],
-  options?: { cwd?: string; check?: boolean },
+  options?: { cwd?: string; check?: boolean; env?: NodeJS.ProcessEnv },
 ): Promise<{ stdout: string; stderr: string }> {
   try {
     const { stdout, stderr } = await execFileAsync(executable, [...args], {
       cwd: options?.cwd,
-      env: process.env,
+      env: options?.env ?? process.env,
     });
     return { stdout, stderr };
   } catch (error) {
@@ -126,11 +138,21 @@ async function runCommand(
   }
 }
 
+function getGitEnv(): NodeJS.ProcessEnv {
+  const env: NodeJS.ProcessEnv = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (value !== undefined && !(key.startsWith('GIT_') && key !== 'GIT_SSH_COMMAND')) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
 async function runGit(
   args: readonly string[],
   options?: { cwd?: string; check?: boolean },
 ): Promise<{ stdout: string; stderr: string }> {
-  return runCommand('git', args, options);
+  return runCommand('git', args, { ...options, env: getGitEnv() });
 }
 
 async function runGh(
@@ -164,14 +186,12 @@ async function resolveDefaultBranch(repoDir: string): Promise<string> {
   return 'main';
 }
 
-async function updateCacheRepo(repoDir: string, baseBranch: string): Promise<void> {
+async function fetchResultsRepo(repoDir: string): Promise<void> {
   await runGit(['fetch', 'origin', '--prune'], { cwd: repoDir });
-  await runGit(['checkout', baseBranch], { cwd: repoDir });
-  await runGit(['pull', '--ff-only', 'origin', baseBranch], { cwd: repoDir });
 }
 
 function updateStatusFile(config: ResultsConfig, patch: PersistedStatus): void {
-  const cachePaths = getResultsRepoCachePaths(config.repo);
+  const cachePaths = getResultsRepoLocalPaths(config.repo);
   const current = readPersistedStatus(cachePaths.statusFile);
   writePersistedStatus(cachePaths.statusFile, {
     ...current,
@@ -181,29 +201,35 @@ function updateStatusFile(config: ResultsConfig, patch: PersistedStatus): void {
 
 export async function ensureResultsRepoClone(config: ResultsConfig): Promise<string> {
   const normalized = normalizeResultsConfig(config);
-  const cachePaths = getResultsRepoCachePaths(normalized.repo);
+  const cachePaths = getResultsRepoLocalPaths(normalized.repo);
+  const cloneDir = normalized.path;
   mkdirSync(cachePaths.rootDir, { recursive: true });
+  mkdirSync(path.dirname(cloneDir), { recursive: true });
 
-  if (!existsSync(cachePaths.repoDir)) {
+  const cloneMissing = !existsSync(cloneDir);
+  const gitDir = path.join(cloneDir, '.git');
+  const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
+
+  if (cloneMissing || cloneEmpty) {
     try {
       await runGit([
         'clone',
         '--filter=blob:none',
         resolveResultsRepoUrl(normalized.repo),
-        cachePaths.repoDir,
+        cloneDir,
       ]);
-      return cachePaths.repoDir;
+      return cloneDir;
     } catch (error) {
       updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
       throw withFriendlyGitHubAuthError(error);
     }
   }
 
-  if (!existsSync(path.join(cachePaths.repoDir, '.git'))) {
-    throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
+  if (!existsSync(gitDir)) {
+    throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
   }
 
-  return cachePaths.repoDir;
+  return cloneDir;
 }
 
 export function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus {
@@ -212,22 +238,22 @@ export function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus
       configured: false,
       available: false,
       repo: '',
-      cache_dir: '',
+      local_dir: '',
     };
   }
 
   const normalized = normalizeResultsConfig(config);
-  const cachePaths = getResultsRepoCachePaths(normalized.repo);
-  const persisted = readPersistedStatus(cachePaths.statusFile);
+  const localPaths = getResultsRepoLocalPaths(normalized.repo);
+  const persisted = readPersistedStatus(localPaths.statusFile);
 
   return {
     configured: true,
-    available: existsSync(cachePaths.repoDir),
+    available: existsSync(normalized.path),
     repo: normalized.repo,
     path: normalized.path,
     auto_push: normalized.auto_push,
     branch_prefix: normalized.branch_prefix,
-    cache_dir: cachePaths.repoDir,
+    local_dir: normalized.path,
     last_synced_at: persisted.last_synced_at,
     last_error: persisted.last_error,
   };
@@ -238,8 +264,7 @@ export async function syncResultsRepo(config: ResultsConfig): Promise<ResultsRep
 
   try {
     const repoDir = await ensureResultsRepoClone(normalized);
-    const baseBranch = await resolveDefaultBranch(repoDir);
-    await updateCacheRepo(repoDir, baseBranch);
+    await fetchResultsRepo(repoDir);
     updateStatusFile(normalized, {
       last_synced_at: new Date().toISOString(),
       last_error: undefined,
@@ -261,7 +286,7 @@ export async function checkoutResultsRepoBranch(
   const normalized = normalizeResultsConfig(config);
   const repoDir = await ensureResultsRepoClone(normalized);
   const baseBranch = await resolveDefaultBranch(repoDir);
-  await updateCacheRepo(repoDir, baseBranch);
+  await fetchResultsRepo(repoDir);
   await runGit(['checkout', '-B', branchName, `origin/${baseBranch}`], { cwd: repoDir });
   updateStatusFile(normalized, { last_error: undefined });
   return {
@@ -278,7 +303,7 @@ export async function prepareResultsRepoBranch(
   const normalized = normalizeResultsConfig(config);
   const cloneDir = await ensureResultsRepoClone(normalized);
   const baseBranch = await resolveDefaultBranch(cloneDir);
-  await updateCacheRepo(cloneDir, baseBranch);
+  await fetchResultsRepo(cloneDir);
 
   const worktreeRoot = await mkdtemp(path.join(os.tmpdir(), 'agentv-results-repo-'));
   const worktreeDir = path.join(worktreeRoot, 'repo');
@@ -312,10 +337,7 @@ export async function stageResultsArtifacts(params: {
 
 export function resolveResultsRepoRunsDir(config: ResultsConfig): string {
   const normalized = normalizeResultsConfig(config);
-  return path.join(
-    getResultsRepoCachePaths(normalized.repo).repoDir,
-    ...normalized.path.split('/'),
-  );
+  return path.join(normalized.path, 'runs');
 }
 
 export async function directorySizeBytes(targetPath: string): Promise<number> {
@@ -358,7 +380,7 @@ export async function pushResultsRepoBranch(
 ): Promise<void> {
   const normalized = normalizeResultsConfig(config);
   await runGit(['push', '-u', 'origin', branchName], {
-    cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir,
+    cwd: cwd ?? normalized.path,
   });
   updateStatusFile(normalized, {
     last_synced_at: new Date().toISOString(),
@@ -399,7 +421,7 @@ const DIRECT_PUSH_MAX_RETRIES = 3;
 
 /**
  * Push results directly to the base branch of the results repo.
- * Handles non-fast-forward conflicts by pulling with rebase and retrying.
+ * Handles non-fast-forward conflicts by fetching, rebasing, and retrying.
  * Returns true if artifacts were pushed, false if no changes were detected.
  */
 export async function directPushResults(params: {
@@ -411,9 +433,9 @@ export async function directPushResults(params: {
   const normalized = normalizeResultsConfig(params.config);
   const repoDir = await ensureResultsRepoClone(normalized);
   const baseBranch = await resolveDefaultBranch(repoDir);
-  await updateCacheRepo(repoDir, baseBranch);
+  await fetchResultsRepo(repoDir);
 
-  const destinationDir = path.join(repoDir, normalized.path, params.destinationPath);
+  const destinationDir = path.join(repoDir, 'runs', params.destinationPath);
   await stageResultsArtifacts({
     repoDir,
     sourceDir: params.sourceDir,
@@ -429,11 +451,20 @@ export async function directPushResults(params: {
     return false;
   }
 
-  await runGit(['commit', '-m', params.commitMessage], { cwd: repoDir });
+  await runGit(
+    [
+      'commit',
+      '-m',
+      params.commitMessage,
+      '-m',
+      `Agentv-Run: ${buildGitRunId(params.destinationPath)}`,
+    ],
+    { cwd: repoDir },
+  );
 
   for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
     try {
-      await runGit(['push', 'origin', baseBranch], { cwd: repoDir });
+      await runGit(['push', 'origin', `HEAD:${baseBranch}`], { cwd: repoDir });
       updateStatusFile(normalized, {
         last_synced_at: new Date().toISOString(),
         last_error: undefined,
@@ -442,7 +473,8 @@ export async function directPushResults(params: {
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes('non-fast-forward')) {
-        await runGit(['pull', '--rebase', 'origin', baseBranch], { cwd: repoDir });
+        await fetchResultsRepo(repoDir);
+        await runGit(['rebase', `origin/${baseBranch}`], { cwd: repoDir });
       } else {
         throw error;
       }
@@ -451,3 +483,217 @@ export async function directPushResults(params: {
 
   return false;
 }
+
+export interface GitListedRun {
+  run_id: string;
+  experiment: string;
+  timestamp: string;
+  pass_rate?: number;
+  target?: string;
+  manifest_path: string;
+  benchmark_path: string;
+  display_name: string;
+  test_count: number;
+  avg_score: number;
+  size_bytes: number;
+}
+
+type GitBatchBlob = {
+  readonly size: number;
+  readonly content: Buffer;
+};
+
+type GitRunBenchmark = {
+  readonly metadata?: {
+    readonly timestamp?: string;
+    readonly experiment?: string;
+    readonly targets?: readonly string[];
+    readonly tests_run?: readonly string[];
+  };
+  readonly run_summary?: Record<
+    string,
+    {
+      readonly pass_rate?: { readonly mean?: number };
+    }
+  >;
+};
+
+function buildGitRunId(relativeRunPath: string): string {
+  const normalized = relativeRunPath.split(path.sep).join('/');
+  const segments = normalized.split('/').filter(Boolean);
+  if (segments.length >= 2) {
+    const experiment = segments.slice(0, -1).join('/');
+    const timestamp = segments.at(-1);
+    if (experiment === 'default') {
+      return timestamp ?? normalized;
+    }
+    return `${experiment}::${timestamp}`;
+  }
+  return segments[0] ?? relativeRunPath;
+}
+
+function getRunExperiment(runId: string, benchmark: GitRunBenchmark): string {
+  const experiment = benchmark.metadata?.experiment?.trim();
+  if (experiment) {
+    return experiment;
+  }
+
+  const separatorIndex = runId.lastIndexOf('::');
+  return separatorIndex === -1 ? 'default' : runId.slice(0, separatorIndex);
+}
+
+function computeAveragePassRate(runSummary: GitRunBenchmark['run_summary']): number | undefined {
+  if (!runSummary) {
+    return undefined;
+  }
+
+  const passRates = Object.values(runSummary)
+    .map((summary) => summary.pass_rate?.mean)
+    .filter((value): value is number => typeof value === 'number' && Number.isFinite(value));
+
+  if (passRates.length === 0) {
+    return undefined;
+  }
+
+  return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
+}
+
+async function runGitBatch(repoDir: string, input: string): Promise<Buffer> {
+  return new Promise((resolve, reject) => {
+    const child = spawn('git', ['cat-file', '--batch'], {
+      cwd: repoDir,
+      env: getGitEnv(),
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+
+    const stdoutChunks: Buffer[] = [];
+    const stderrChunks: Buffer[] = [];
+
+    child.stdout.on('data', (chunk: Buffer | string) => {
+      stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+    });
+    child.stderr.on('data', (chunk: Buffer | string) => {
+      stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+    });
+    child.on('error', (error) => reject(withFriendlyGitHubAuthError(error)));
+    child.on('close', (code) => {
+      if (code === 0) {
+        resolve(Buffer.concat(stdoutChunks));
+        return;
+      }
+
+      const stderr = Buffer.concat(stderrChunks).toString('utf8').trim();
+      reject(
+        withFriendlyGitHubAuthError(
+          stderr.length > 0 ? new Error(stderr) : new Error('git cat-file failed'),
+        ),
+      );
+    });
+
+    child.stdin.end(input);
+  });
+}
+
+function parseGitBatchBlobs(output: Buffer): GitBatchBlob[] {
+  const blobs: GitBatchBlob[] = [];
+  let offset = 0;
+
+  while (offset < output.length) {
+    const headerEnd = output.indexOf(0x0a, offset);
+    if (headerEnd === -1) {
+      throw new Error('Malformed git cat-file output: missing header terminator');
+    }
+
+    const header = output.subarray(offset, headerEnd).toString('utf8');
+    offset = headerEnd + 1;
+
+    if (header.length === 0) {
+      continue;
+    }
+
+    const missingMatch = /^(.*) missing$/.exec(header);
+    if (missingMatch) {
+      continue;
+    }
+
+    const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
+    if (!headerMatch) {
+      throw new Error(`Malformed git cat-file header: ${header}`);
+    }
+
+    const [, objectRef, objectType, sizeText] = headerMatch;
+    if (objectType !== 'blob') {
+      throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
+    }
+
+    const size = Number.parseInt(sizeText, 10);
+    const contentEnd = offset + size;
+    if (contentEnd > output.length) {
+      throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
+    }
+
+    blobs.push({
+      size,
+      content: output.subarray(offset, contentEnd),
+    });
+    offset = contentEnd;
+
+    if (offset < output.length && output[offset] === 0x0a) {
+      offset += 1;
+    }
+  }
+
+  return blobs;
+}
+
+export async function listGitRuns(repoDir: string, ref = 'origin/main'): Promise<GitListedRun[]> {
+  const { stdout: treeOut } = await runGit(['ls-tree', '-r', '--name-only', ref, 'runs'], {
+    cwd: repoDir,
+  });
+
+  const benchmarkPaths = treeOut
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter((line) => line.endsWith('/benchmark.json'));
+  if (benchmarkPaths.length === 0) {
+    return [];
+  }
+
+  const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join('\n')}\n`;
+  const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
+  if (blobs.length !== benchmarkPaths.length) {
+    throw new Error(
+      `Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`,
+    );
+  }
+
+  const runs = blobs.flatMap((blob, index): GitListedRun[] => {
+    const benchmarkPath = benchmarkPaths[index];
+    const benchmark = JSON.parse(blob.content.toString('utf8')) as GitRunBenchmark;
+    const runDir = path.posix.dirname(benchmarkPath);
+    const relativeRunPath = path.posix.relative('runs', runDir);
+    const runId = buildGitRunId(relativeRunPath);
+    const timestamp = benchmark.metadata?.timestamp?.trim() || path.posix.basename(runDir);
+    const targets = benchmark.metadata?.targets ?? [];
+    const passRate = computeAveragePassRate(benchmark.run_summary);
+
+    return [
+      {
+        run_id: runId,
+        experiment: getRunExperiment(runId, benchmark),
+        timestamp,
+        ...(passRate !== undefined && { pass_rate: passRate }),
+        ...(targets.length === 1 && targets[0] ? { target: targets[0] } : {}),
+        manifest_path: path.posix.join(runDir, 'index.jsonl'),
+        benchmark_path: benchmarkPath,
+        display_name: path.posix.basename(runDir),
+        test_count: benchmark.metadata?.tests_run?.length ?? 0,
+        avg_score: 0,
+        size_bytes: blob.size,
+      },
+    ];
+  });
+
+  runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
+  return runs;
+}
diff --git a/packages/core/src/evaluation/validation/config-validator.ts b/packages/core/src/evaluation/validation/config-validator.ts
index 5196feaf..38968f77 100644
--- a/packages/core/src/evaluation/validation/config-validator.ts
+++ b/packages/core/src/evaluation/validation/config-validator.ts
@@ -78,22 +78,48 @@ export async function validateConfigFile(filePath: string): Promise<ValidationRe
         });
       } else {
         const resultsRecord = results as Record<string, unknown>;
-        if (typeof resultsRecord.repo !== 'string' || resultsRecord.repo.trim().length === 0) {
+        if (resultsRecord.mode !== 'github') {
           errors.push({
             severity: 'error',
             filePath,
-            location: 'results.repo',
-            message: "Field 'results.repo' must be a non-empty string",
+            location: 'results.mode',
+            message: "Field 'results.mode' must be 'github'",
           });
         }
-        if (typeof resultsRecord.path !== 'string' || resultsRecord.path.trim().length === 0) {
+        if (typeof resultsRecord.repo !== 'string' || resultsRecord.repo.trim().length === 0) {
           errors.push({
             severity: 'error',
             filePath,
-            location: 'results.path',
-            message: "Field 'results.path' must be a non-empty string",
+            location: 'results.repo',
+            message: "Field 'results.repo' must be a non-empty string",
           });
         }
+        if (resultsRecord.path !== undefined) {
+          if (typeof resultsRecord.path !== 'string' || resultsRecord.path.trim().length === 0) {
+            errors.push({
+              severity: 'error',
+              filePath,
+              location: 'results.path',
+              message: "Field 'results.path' must be a non-empty string",
+            });
+          } else {
+            const p = resultsRecord.path.trim();
+            const isFilesystemPath =
+              p.startsWith('/') ||
+              p.startsWith('~/') ||
+              p.startsWith('~\\') ||
+              p === '~' ||
+              /^[A-Za-z]:[/\\]/.test(p);
+            if (!isFilesystemPath) {
+              errors.push({
+                severity: 'error',
+                filePath,
+                location: 'results.path',
+                message: `'results.path' must be an absolute or home-relative filesystem path (e.g., ~/data/agentv-results). Found: '${p}'. Remove 'path' to use the default.`,
+              });
+            }
+          }
+        }
         if (resultsRecord.auto_push !== undefined && typeof resultsRecord.auto_push !== 'boolean') {
           errors.push({
             severity: 'error',
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index aab188c8..aa43c2a9 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -61,7 +61,7 @@ export { toSnakeCaseDeep, toCamelCaseDeep } from './evaluation/case-conversion.j
 export {
   ensureResultsRepoClone,
   syncResultsRepo,
-  getResultsRepoCachePaths,
+  getResultsRepoLocalPaths,
   getResultsRepoStatus,
   normalizeResultsConfig,
   resolveResultsRepoRunsDir,
@@ -74,9 +74,11 @@ export {
   pushResultsRepoBranch,
   createDraftResultsPr,
   directPushResults,
+  listGitRuns,
   type CheckedOutResultsRepoBranch,
+  type GitListedRun,
   type PreparedResultsRepoBranch,
-  type ResultsRepoCachePaths,
+  type ResultsRepoLocalPaths,
   type ResultsRepoStatus,
 } from './evaluation/results-repo.js';
 export {
diff --git a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
index b8d32524..6918f56e 100644
--- a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
+++ b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
@@ -9,245 +9,295 @@ import { describe, expect, it } from 'bun:test';
 import path from 'node:path';
 import { evaluate } from '../../src/evaluation/evaluate.js';
 
+const PROGRAMMATIC_API_TIMEOUT_MS = 15_000;
+
 describe('evaluate() — programmatic API extensions', () => {
   // ---------------------------------------------------------------------------
   // budgetUsd
   // ---------------------------------------------------------------------------
 
-  it('accepts budgetUsd and passes it to the orchestrator', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'budget-test',
-          input: 'hello',
-          assert: [{ type: 'contains', value: 'hello' }],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'hello world' },
-      budgetUsd: 10.0,
-    });
-    expect(summary.passed).toBe(1);
-  });
+  it(
+    'accepts budgetUsd and passes it to the orchestrator',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'budget-test',
+            input: 'hello',
+            assert: [{ type: 'contains', value: 'hello' }],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'hello world' },
+        budgetUsd: 10.0,
+      });
+      expect(summary.passed).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // turns + mode: 'conversation'
   // ---------------------------------------------------------------------------
 
-  it('accepts turns with explicit conversation mode', async () => {
-    const { summary, results } = await evaluate({
-      tests: [
-        {
-          id: 'conversation-explicit',
-          mode: 'conversation',
-          turns: [
-            {
-              input: 'Hello',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-            {
-              input: 'How are you?',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-    });
-    expect(summary.total).toBe(1);
-    expect(results.length).toBe(1);
-  });
-
-  it('infers conversation mode when turns[] is provided without explicit mode', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'conversation-inferred',
-          turns: [
-            {
-              input: 'First turn',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-    });
-    expect(summary.total).toBe(1);
-  });
-
-  it('supports expectedOutput on individual turns', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'turn-expected-output',
-          turns: [
-            {
-              input: 'Say hello',
-              expectedOutput: 'Hello!',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-    });
-    expect(summary.total).toBe(1);
-  });
-
-  it('supports message array input in turns', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'turn-message-array',
-          turns: [
-            {
-              input: [
-                { role: 'system', content: 'You are helpful' },
-                { role: 'user', content: 'Hello' },
-              ],
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-    });
-    expect(summary.total).toBe(1);
-  });
+  it(
+    'accepts turns with explicit conversation mode',
+    async () => {
+      const { summary, results } = await evaluate({
+        tests: [
+          {
+            id: 'conversation-explicit',
+            mode: 'conversation',
+            turns: [
+              {
+                input: 'Hello',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+              {
+                input: 'How are you?',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+      });
+      expect(summary.total).toBe(1);
+      expect(results.length).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
+
+  it(
+    'infers conversation mode when turns[] is provided without explicit mode',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'conversation-inferred',
+            turns: [
+              {
+                input: 'First turn',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
+
+  it(
+    'supports expectedOutput on individual turns',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'turn-expected-output',
+            turns: [
+              {
+                input: 'Say hello',
+                expectedOutput: 'Hello!',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
+
+  it(
+    'supports message array input in turns',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'turn-message-array',
+            turns: [
+              {
+                input: [
+                  { role: 'system', content: 'You are helpful' },
+                  { role: 'user', content: 'Hello' },
+                ],
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // aggregation
   // ---------------------------------------------------------------------------
 
-  it('accepts aggregation on conversation tests', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'aggregation-min',
-          turns: [
-            {
-              input: 'Turn 1',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-            {
-              input: 'Turn 2',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-          aggregation: 'min',
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-    });
-    expect(summary.total).toBe(1);
-  });
+  it(
+    'accepts aggregation on conversation tests',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'aggregation-min',
+            turns: [
+              {
+                input: 'Turn 1',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+              {
+                input: 'Turn 2',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+            aggregation: 'min',
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // beforeAll
   // ---------------------------------------------------------------------------
 
-  it('accepts beforeAll as a string', async () => {
-    // beforeAll requires a workspace to execute in; without repos it just attaches
-    // the hook config. This test verifies the type is accepted without throwing.
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'before-all-string',
-          input: 'hello',
-          assert: [{ type: 'contains', value: 'test' }],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'test output' },
-      beforeAll: 'echo "setup complete"',
-    });
-    expect(summary.total).toBe(1);
-  });
-
-  it('accepts beforeAll as a string array', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'before-all-array',
-          input: 'hello',
-          assert: [{ type: 'contains', value: 'test' }],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'test output' },
-      beforeAll: ['echo', 'setup complete'],
-    });
-    expect(summary.total).toBe(1);
-  });
+  it(
+    'accepts beforeAll as a string',
+    async () => {
+      // beforeAll requires a workspace to execute in; without repos it just attaches
+      // the hook config. This test verifies the type is accepted without throwing.
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'before-all-string',
+            input: 'hello',
+            assert: [{ type: 'contains', value: 'test' }],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'test output' },
+        beforeAll: 'echo "setup complete"',
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
+
+  it(
+    'accepts beforeAll as a string array',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'before-all-array',
+            input: 'hello',
+            assert: [{ type: 'contains', value: 'test' }],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'test output' },
+        beforeAll: ['echo', 'setup complete'],
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // Combined usage
   // ---------------------------------------------------------------------------
 
-  it('supports all new fields together', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'combined-test',
-          turns: [
-            {
-              input: 'Hello',
-              expectedOutput: 'Hi there',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-            {
-              input: 'Goodbye',
-              assert: [{ type: 'contains', value: 'mock' }],
-            },
-          ],
-          aggregation: 'mean',
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'mock response' },
-      budgetUsd: 5.0,
-      beforeAll: 'echo "setup"',
-    });
-    expect(summary.total).toBe(1);
-  });
+  it(
+    'supports all new fields together',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'combined-test',
+            turns: [
+              {
+                input: 'Hello',
+                expectedOutput: 'Hi there',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+              {
+                input: 'Goodbye',
+                assert: [{ type: 'contains', value: 'mock' }],
+              },
+            ],
+            aggregation: 'mean',
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'mock response' },
+        budgetUsd: 5.0,
+        beforeAll: 'echo "setup"',
+      });
+      expect(summary.total).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // Backwards compatibility: input still works as before
   // ---------------------------------------------------------------------------
 
-  it('still works with standard single-turn input', async () => {
-    const { summary } = await evaluate({
-      tests: [
-        {
-          id: 'standard-input',
-          input: 'hello',
-          assert: [{ type: 'contains', value: 'hello' }],
-        },
-      ],
-      target: { name: 'default', provider: 'mock', response: 'hello world' },
-    });
-    expect(summary.passed).toBe(1);
-  });
-
-  it('uses inline target from a TypeScript specFile', async () => {
-    const specFile = path.join(import.meta.dir, 'loaders', 'fixtures', 'default-export.eval.ts');
-
-    const { summary } = await evaluate({
-      specFile,
-    });
-
-    expect(summary.total).toBe(1);
-    expect(summary.passed).toBe(1);
-  });
+  it(
+    'still works with standard single-turn input',
+    async () => {
+      const { summary } = await evaluate({
+        tests: [
+          {
+            id: 'standard-input',
+            input: 'hello',
+            assert: [{ type: 'contains', value: 'hello' }],
+          },
+        ],
+        target: { name: 'default', provider: 'mock', response: 'hello world' },
+      });
+      expect(summary.passed).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
+
+  it(
+    'uses inline target from a TypeScript specFile',
+    async () => {
+      const specFile = path.join(import.meta.dir, 'loaders', 'fixtures', 'default-export.eval.ts');
+
+      const { summary } = await evaluate({
+        specFile,
+      });
+
+      expect(summary.total).toBe(1);
+      expect(summary.passed).toBe(1);
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 
   // ---------------------------------------------------------------------------
   // Validation
   // ---------------------------------------------------------------------------
 
-  it('throws when input is missing on a non-conversation test', async () => {
-    expect(() =>
-      evaluate({
-        // biome-ignore lint/suspicious/noExplicitAny: intentionally testing invalid input
-        tests: [{ id: 'no-input', assert: [{ type: 'contains', value: 'x' }] } as any],
-        target: { name: 'default', provider: 'mock', response: 'hello' },
-      }),
-    ).toThrow("Test 'no-input': input is required for non-conversation tests");
-  });
+  it(
+    'throws when input is missing on a non-conversation test',
+    async () => {
+      expect(() =>
+        evaluate({
+          // biome-ignore lint/suspicious/noExplicitAny: intentionally testing invalid input
+          tests: [{ id: 'no-input', assert: [{ type: 'contains', value: 'x' }] } as any],
+          target: { name: 'default', provider: 'mock', response: 'hello' },
+        }),
+      ).toThrow("Test 'no-input': input is required for non-conversation tests");
+    },
+    PROGRAMMATIC_API_TIMEOUT_MS,
+  );
 });
diff --git a/packages/core/test/evaluation/loaders/config-loader.test.ts b/packages/core/test/evaluation/loaders/config-loader.test.ts
index 3846b471..e97b03a4 100644
--- a/packages/core/test/evaluation/loaders/config-loader.test.ts
+++ b/packages/core/test/evaluation/loaders/config-loader.test.ts
@@ -137,11 +137,12 @@ describe('extractTrialsConfig', () => {
 });
 
 describe('parseResultsConfig', () => {
-  it('parses valid results config', () => {
+  it('parses valid results config with explicit path', () => {
     const result = parseResultsConfig(
       {
+        mode: 'github',
         repo: 'EntityProcess/agentv-evals',
-        path: 'autopilot-dev/runs',
+        path: '~/data/agentv-results',
         auto_push: true,
         branch_prefix: 'eval-results',
       },
@@ -149,18 +150,83 @@ describe('parseResultsConfig', () => {
     );
 
     expect(result).toEqual({
+      mode: 'github',
       repo: 'EntityProcess/agentv-evals',
-      path: 'autopilot-dev/runs',
+      path: '~/data/agentv-results',
       auto_push: true,
       branch_prefix: 'eval-results',
     });
   });
 
+  it('parses valid results config without path (defaults omitted)', () => {
+    const result = parseResultsConfig(
+      {
+        mode: 'github',
+        repo: 'EntityProcess/agentv-evals',
+      },
+      '/tmp/.agentv/config.yaml',
+    );
+
+    expect(result).toEqual({
+      mode: 'github',
+      repo: 'EntityProcess/agentv-evals',
+    });
+  });
+
+  it('returns undefined when mode is missing', () => {
+    const result = parseResultsConfig(
+      {
+        repo: 'EntityProcess/agentv-evals',
+      },
+      '/tmp/.agentv/config.yaml',
+    );
+
+    expect(result).toBeUndefined();
+  });
+
+  it('returns undefined when mode is not github', () => {
+    const result = parseResultsConfig(
+      {
+        mode: 'other',
+        repo: 'EntityProcess/agentv-evals',
+      },
+      '/tmp/.agentv/config.yaml',
+    );
+
+    expect(result).toBeUndefined();
+  });
+
+  it('returns undefined when path looks like a repo subdirectory', () => {
+    const result = parseResultsConfig(
+      {
+        mode: 'github',
+        repo: 'EntityProcess/agentv-evals',
+        path: 'autopilot-dev/runs',
+      },
+      '/tmp/.agentv/config.yaml',
+    );
+
+    expect(result).toBeUndefined();
+  });
+
+  it('accepts absolute path', () => {
+    const result = parseResultsConfig(
+      {
+        mode: 'github',
+        repo: 'EntityProcess/agentv-evals',
+        path: '/home/user/data/results',
+      },
+      '/tmp/.agentv/config.yaml',
+    );
+
+    expect(result?.path).toBe('/home/user/data/results');
+  });
+
   it('returns undefined when repo is empty', () => {
     const result = parseResultsConfig(
       {
+        mode: 'github',
         repo: '',
-        path: 'autopilot-dev/runs',
       },
       '/tmp/.agentv/config.yaml',
     );
@@ -171,8 +237,8 @@ describe('parseResultsConfig', () => {
   it('returns undefined when repo is not a string', () => {
     const result = parseResultsConfig(
       {
+        mode: 'github',
         repo: 123,
-        path: 'autopilot-dev/runs',
       },
       '/tmp/.agentv/config.yaml',
     );
diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts
index daac1ee1..d4cc49e9 100644
--- a/packages/core/test/evaluation/orchestrator.test.ts
+++ b/packages/core/test/evaluation/orchestrator.test.ts
@@ -3082,9 +3082,13 @@ fs.writeFileSync(path.join(payload.workspace_path, 'hook.txt'), payload.test_id
       responses: [{ output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }] }],
     });
 
-    // Use YAML workspace.path (not CLI --workspace) with type: git repos.
-    // repo-a exists → should be reused. repo-b is missing but uses a fake URL → should fail clone.
-    // Since repo-a is reused (skipped) and repo-b clone fails, this proves per-repo logic works.
+    const missingRepoBSource = path.join(testDir, 'missing-repo-b-source');
+
+    // Use YAML workspace.path (not CLI --workspace) with mixed repo states.
+    // repo-a exists → should be reused. repo-b is missing and points to a missing local source
+    // → should fail immediately. Since repo-a is reused (skipped) and repo-b materialization
+    // fails fast, this proves the per-repo existence check works without depending on network
+    // timeouts from cloning fake remotes.
     const evalCase: EvalTest = {
       ...baseTestCase,
       workspace: {
@@ -3098,15 +3102,14 @@ fs.writeFileSync(path.join(payload.workspace_path, 'hook.txt'), payload.test_id
           },
           {
             path: 'repo-b',
-            source: { type: 'git', url: 'https://github.com/example/repo-b.git' },
-            checkout: { ref: 'main' },
+            source: { type: 'local', path: missingRepoBSource },
           },
         ],
       },
     };
 
-    // repo-b clone will fail (fake URL), which proves repo-a was skipped (per-repo check)
-    // and only repo-b was attempted
+    // repo-b materialization fails immediately, which proves repo-a was skipped
+    // and only repo-b was attempted.
     await expect(
       runEvaluation({
         testFilePath: 'in-memory.yaml',
@@ -3117,7 +3120,7 @@ fs.writeFileSync(path.join(payload.workspace_path, 'hook.txt'), payload.test_id
         evalCases: [evalCase],
         keepWorkspaces: true,
       }),
-    ).rejects.toThrow('Failed to materialize repos');
+    ).rejects.toThrow('Local repo path validation failed');
 
     // repo-a marker should still exist (not deleted by static workspace cleanup)
     await fsAccess(path.join(repoADir, 'marker.txt'));
diff --git a/packages/core/test/evaluation/results-repo.test.ts b/packages/core/test/evaluation/results-repo.test.ts
new file mode 100644
index 00000000..211f2e98
--- /dev/null
+++ b/packages/core/test/evaluation/results-repo.test.ts
@@ -0,0 +1,294 @@
+import { execSync } from 'node:child_process';
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+
+import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
+
+import type { ResultsConfig } from '../../src/evaluation/loaders/config-loader.js';
+import {
+  directPushResults,
+  ensureResultsRepoClone,
+  listGitRuns,
+  syncResultsRepo,
+} from '../../src/evaluation/results-repo.js';
+
+function cleanGitEnv(): Record<string, string> {
+  const env: Record<string, string> = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (value !== undefined && !(key.startsWith('GIT_') && key !== 'GIT_SSH_COMMAND')) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
+function git(cmd: string, cwd: string): string {
+  return execSync(cmd, {
+    cwd,
+    env: cleanGitEnv(),
+    stdio: ['ignore', 'pipe', 'pipe'],
+  })
+    .toString()
+    .trim();
+}
+
+function createResultsConfig(repoDir: string, cloneDir: string): ResultsConfig {
+  return {
+    mode: 'github',
+    repo: `file://${repoDir}`,
+    path: cloneDir,
+    auto_push: true,
+  };
+}
+
+function initializeRemoteRepo(rootDir: string): { remoteDir: string; seedDir: string } {
+  const remoteDir = path.join(rootDir, 'results-remote.git');
+  git(`git init --bare --initial-branch=main --quiet "${remoteDir}"`, rootDir);
+
+  const seedDir = path.join(rootDir, 'results-seed');
+  git(`git clone --quiet "${remoteDir}" "${seedDir}"`, rootDir);
+  git('git config user.email "test@example.com"', seedDir);
+  git('git config user.name "Test User"', seedDir);
+  writeFileSync(path.join(seedDir, 'README.md'), '# results repo\n');
+  git('git add README.md && git commit --quiet -m "seed repo"', seedDir);
+  git('git push --quiet origin main', seedDir);
+
+  return { remoteDir, seedDir };
+}
+
+function writeRunArtifacts(runDir: string, experiment: string, timestamp: string): void {
+  mkdirSync(runDir, { recursive: true });
+  writeFileSync(path.join(runDir, 'index.jsonl'), '{"test_id":"alpha"}\n');
+  writeFileSync(
+    path.join(runDir, 'benchmark.json'),
+    JSON.stringify(
+      {
+        metadata: {
+          timestamp,
+          experiment,
+          targets: ['gpt-4o'],
+          tests_run: ['alpha'],
+        },
+        run_summary: {
+          'gpt-4o': {
+            pass_rate: { mean: 1 },
+          },
+        },
+      },
+      null,
+      2,
+    ),
+  );
+}
+
+describe('listGitRuns', () => {
+  let repoDir: string;
+
+  beforeEach(() => {
+    repoDir = mkdtempSync(path.join(os.tmpdir(), 'agentv-results-repo-test-'));
+    git('git init', repoDir);
+    git('git config user.email "test@example.com"', repoDir);
+    git('git config user.name "Test User"', repoDir);
+  });
+
+  afterEach(() => {
+    rmSync(repoDir, { recursive: true, force: true });
+  });
+
+  it('returns committed runs derived from benchmark.json blobs', async () => {
+    const defaultRunDir = path.join(repoDir, 'runs', 'default', '2026-05-20T10-00-00-000Z');
+    mkdirSync(defaultRunDir, { recursive: true });
+    writeFileSync(
+      path.join(defaultRunDir, 'benchmark.json'),
+      JSON.stringify(
+        {
+          metadata: {
+            timestamp: '2026-05-20T10:00:00.000Z',
+            targets: ['gpt-4o'],
+            tests_run: ['alpha', 'beta'],
+          },
+          run_summary: {
+            'gpt-4o': {
+              pass_rate: { mean: 0.5 },
+            },
+          },
+        },
+        null,
+        2,
+      ),
+    );
+
+    const experimentRunDir = path.join(repoDir, 'runs', 'with-skills', '2026-05-21T11-00-00-000Z');
+    mkdirSync(experimentRunDir, { recursive: true });
+    writeFileSync(
+      path.join(experimentRunDir, 'benchmark.json'),
+      JSON.stringify(
+        {
+          metadata: {
+            timestamp: '2026-05-21T11:00:00.000Z',
+            experiment: 'with-skills',
+            targets: ['claude-sonnet', 'gpt-4o'],
+            tests_run: ['alpha', 'beta', 'gamma'],
+          },
+          run_summary: {
+            'claude-sonnet': {
+              pass_rate: { mean: 1 },
+            },
+            'gpt-4o': {
+              pass_rate: { mean: 0.5 },
+            },
+          },
+        },
+        null,
+        2,
+      ),
+    );
+
+    git('git add runs && git commit -m "seed runs"', repoDir);
+
+    const runs = await listGitRuns(repoDir, 'HEAD');
+
+    expect(runs).toHaveLength(2);
+    expect(runs.map((run) => run.run_id)).toEqual([
+      'with-skills::2026-05-21T11-00-00-000Z',
+      '2026-05-20T10-00-00-000Z',
+    ]);
+    expect(runs[0]).toMatchObject({
+      experiment: 'with-skills',
+      timestamp: '2026-05-21T11:00:00.000Z',
+      display_name: '2026-05-21T11-00-00-000Z',
+      manifest_path: 'runs/with-skills/2026-05-21T11-00-00-000Z/index.jsonl',
+      benchmark_path: 'runs/with-skills/2026-05-21T11-00-00-000Z/benchmark.json',
+      test_count: 3,
+      pass_rate: 0.75,
+      avg_score: 0,
+    });
+    expect(runs[0].target).toBeUndefined();
+    expect(runs[1]).toMatchObject({
+      experiment: 'default',
+      target: 'gpt-4o',
+      manifest_path: 'runs/default/2026-05-20T10-00-00-000Z/index.jsonl',
+      test_count: 2,
+      pass_rate: 0.5,
+    });
+    expect(runs[0].size_bytes).toBeGreaterThan(0);
+  });
+
+  it('returns an empty list when the ref has no committed runs', async () => {
+    writeFileSync(path.join(repoDir, 'README.md'), '# test\n');
+    git('git add README.md && git commit -m "initial"', repoDir);
+
+    await expect(listGitRuns(repoDir, 'HEAD')).resolves.toEqual([]);
+  });
+
+  it('ignores inherited git hook environment variables', async () => {
+    const runDir = path.join(repoDir, 'runs', 'default', '2026-05-20T10-00-00-000Z');
+    mkdirSync(runDir, { recursive: true });
+    writeFileSync(
+      path.join(runDir, 'benchmark.json'),
+      JSON.stringify(
+        {
+          metadata: {
+            timestamp: '2026-05-20T10:00:00.000Z',
+            targets: ['gpt-4o'],
+            tests_run: ['alpha'],
+          },
+          run_summary: {
+            'gpt-4o': {
+              pass_rate: { mean: 1 },
+            },
+          },
+        },
+        null,
+        2,
+      ),
+    );
+    git('git add runs && git commit -m "seed run"', repoDir);
+
+    const previousGitDir = process.env.GIT_DIR;
+    const previousGitWorkTree = process.env.GIT_WORK_TREE;
+    process.env.GIT_DIR = '/tmp/not-the-test-repo';
+    process.env.GIT_WORK_TREE = '/tmp/not-the-test-repo';
+
+    try {
+      const runs = await listGitRuns(repoDir, 'HEAD');
+      expect(runs).toHaveLength(1);
+      expect(runs[0].run_id).toBe('2026-05-20T10-00-00-000Z');
+    } finally {
+      if (previousGitDir === undefined) {
+        process.env.GIT_DIR = undefined;
+      } else {
+        process.env.GIT_DIR = previousGitDir;
+      }
+
+      if (previousGitWorkTree === undefined) {
+        process.env.GIT_WORK_TREE = undefined;
+      } else {
+        process.env.GIT_WORK_TREE = previousGitWorkTree;
+      }
+    }
+  });
+});
+
+describe('results repo write path', () => {
+  let rootDir: string;
+
+  beforeEach(() => {
+    rootDir = mkdtempSync(path.join(os.tmpdir(), 'agentv-results-repo-write-test-'));
+  });
+
+  afterEach(() => {
+    rmSync(rootDir, { recursive: true, force: true });
+  });
+
+  it('commits pushed runs into the configured clone with an Agentv-Run trailer', async () => {
+    const { remoteDir } = initializeRemoteRepo(rootDir);
+    const cloneDir = path.join(rootDir, 'results-clone');
+    const sourceDir = path.join(rootDir, 'source-run');
+    const runTimestamp = '2026-05-22T10-00-00-000Z';
+    const destinationPath = path.join('with-skills', runTimestamp);
+    writeRunArtifacts(sourceDir, 'with-skills', '2026-05-22T10:00:00.000Z');
+
+    const pushed = await directPushResults({
+      config: createResultsConfig(remoteDir, cloneDir),
+      sourceDir,
+      destinationPath,
+      commitMessage: 'feat(results): with-skills - 1/1 PASS (1.000)',
+    });
+
+    expect(pushed).toBe(true);
+    expect(git('git rev-parse --show-toplevel', cloneDir)).toBe(cloneDir);
+    expect(git('git log -1 --pretty=%B', cloneDir)).toContain(
+      `Agentv-Run: with-skills::${runTimestamp}`,
+    );
+    expect(git(`git --git-dir "${remoteDir}" log -1 --pretty=%B main`, rootDir)).toContain(
+      `Agentv-Run: with-skills::${runTimestamp}`,
+    );
+
+    const runs = await listGitRuns(cloneDir, 'HEAD');
+    expect(runs).toHaveLength(1);
+    expect(runs[0].run_id).toBe(`with-skills::${runTimestamp}`);
+  }, 20000);
+
+  it('syncResultsRepo refreshes refs without checking out the base branch', async () => {
+    const { remoteDir, seedDir } = initializeRemoteRepo(rootDir);
+    const cloneDir = path.join(rootDir, 'results-clone');
+    const config = createResultsConfig(remoteDir, cloneDir);
+
+    await ensureResultsRepoClone(config);
+    git('git config user.email "test@example.com"', cloneDir);
+    git('git config user.name "Test User"', cloneDir);
+    git('git checkout -b scratch', cloneDir);
+
+    writeFileSync(path.join(seedDir, 'CHANGELOG.md'), 'remote update\n');
+    git('git add CHANGELOG.md && git commit --quiet -m "remote update"', seedDir);
+    git('git push --quiet origin main', seedDir);
+    const remoteMain = git(`git --git-dir "${remoteDir}" rev-parse main`, rootDir);
+
+    await syncResultsRepo(config);
+
+    expect(git('git branch --show-current', cloneDir)).toBe('scratch');
+    expect(git('git rev-parse origin/main', cloneDir)).toBe(remoteMain);
+  }, 20000);
+});
diff --git a/packages/core/test/evaluation/validation/config-validator.test.ts b/packages/core/test/evaluation/validation/config-validator.test.ts
index f2adaeef..7aa41b91 100644
--- a/packages/core/test/evaluation/validation/config-validator.test.ts
+++ b/packages/core/test/evaluation/validation/config-validator.test.ts
@@ -51,8 +51,8 @@ describe('validateConfigFile', () => {
     await writeFile(
       filePath,
       `results:
+  mode: github
   repo: EntityProcess/agentv-evals
-  path: autopilot-dev/runs
   auto_push: true
   branch_prefix: eval-results
 `,
@@ -64,6 +64,42 @@ describe('validateConfigFile', () => {
     expect(result.errors).toHaveLength(0);
   });
 
+  it('errors on missing results.mode', async () => {
+    const filePath = path.join(tempDir, 'config-results-no-mode.yaml');
+    await writeFile(
+      filePath,
+      `results:
+  repo: EntityProcess/agentv-evals
+`,
+    );
+
+    const result = await validateConfigFile(filePath);
+
+    const fieldErrors = result.errors.filter(
+      (e) => e.severity === 'error' && e.location === 'results.mode',
+    );
+    expect(fieldErrors).toHaveLength(1);
+  });
+
+  it('errors on old-style subdirectory path', async () => {
+    const filePath = path.join(tempDir, 'config-results-old-path.yaml');
+    await writeFile(
+      filePath,
+      `results:
+  mode: github
+  repo: EntityProcess/agentv-evals
+  path: autopilot-dev/runs
+`,
+    );
+
+    const result = await validateConfigFile(filePath);
+
+    const fieldErrors = result.errors.filter(
+      (e) => e.severity === 'error' && e.location === 'results.path',
+    );
+    expect(fieldErrors).toHaveLength(1);
+  });
+
   it('errors on invalid required_version type', async () => {
     const filePath = path.join(tempDir, 'config-bad-version.yaml');
     await writeFile(filePath, 'required_version: 3\n');