diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..0a61fa6
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,35 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  ci:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install deps
+        run: pnpm install --frozen-lockfile
+
+      - name: Lint (biome)
+        run: pnpm run lint
+
+      - name: Typecheck
+        run: pnpm run typecheck
+
+      - name: Test
+        run: pnpm run test
+
+      - name: Build
+        run: pnpm run build
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..e59070d
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,83 @@
+name: Publish
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+
+jobs:
+  verify:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+          registry-url: https://registry.npmjs.org
+
+      - name: Install deps
+        run: pnpm install --frozen-lockfile
+
+      - name: Lint (biome)
+        run: pnpm run lint
+
+      - name: Typecheck
+        run: pnpm run typecheck
+
+      - name: Test
+        run: pnpm run test
+
+      - name: Build
+        run: pnpm run build
+
+      - name: Verify tag/version lock
+        run: |
+          NPM_VERSION=$(node -p "require('./package.json').version")
+          if [[ "${GITHUB_REF:-}" == refs/tags/v* ]]; then
+            TAG_VERSION="${GITHUB_REF#refs/tags/v}"
+            if [ "$TAG_VERSION" != "$NPM_VERSION" ]; then
+              echo "::error::Tag/version mismatch: tag=$TAG_VERSION package=$NPM_VERSION."
+              exit 1
+            fi
+          fi
+          echo "Version locked: $NPM_VERSION"
+
+  publish-npm:
+    needs: verify
+    if: startsWith(github.ref, 'refs/tags/v')
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+          registry-url: https://registry.npmjs.org
+
+      - run: pnpm install --frozen-lockfile
+      - run: pnpm run build
+
+      # Idempotent: re-running a tag whose npm version is already published
+      # must not fail the workflow.
+      - name: Publish to npm (skip if already published)
+        run: |
+          NAME=$(node -p "require('./package.json').name")
+          VERSION=$(node -p "require('./package.json').version")
+          if npm view "$NAME@$VERSION" version >/dev/null 2>&1; then
+            echo "$NAME@$VERSION already on registry; skipping publish"
+          else
+            pnpm publish --no-git-checks --access public
+          fi
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/README.md b/README.md
index cfd0640..52baedd 100644
--- a/README.md
+++ b/README.md
@@ -1,54 +1,32 @@
-# agent-runtime
-
-Reusable runtime lifecycle for domain-specific agents. Standardizes the
-task lifecycle (knowledge readiness → questions/acquisition → control loop
-→ eval) and delegates domain behavior to an adapter. Owns no domain
-policy, models, tools, connectors, or UI.
-
-## Contents
-
-- [Overview](#overview)
-- [Install](#install)
-- [Getting started](#getting-started)
-- [When to use which entry point](#when-to-use-which-entry-point)
-- [Backends for `runAgentTaskStream`](#backends-for-runagenttaskstream)
-- [Lifecycle events](#lifecycle-events)
-- [Knowledge providers](#knowledge-providers)
-- [Sanitized telemetry](#sanitized-telemetry)
-- [Package boundaries](#package-boundaries)
-- [Examples](#examples)
-
-## Overview
-
-```txt
-TaskSpec
-  → Knowledge readiness
-  → Question / acquisition decision
-  → Agent control loop (observe / validate / decide / act)
-  → Eval / verification
-  → Run evidence
-```
-
-For product agents that own a streaming model backend:
-
-```txt
-TaskSpec
-  → Knowledge readiness
-  → Session create/resume
-  → Backend stream
-  → Sanitized RuntimeStreamEvent / SSE
-```
+# @tangle-network/agent-runtime
 
-## Install
+Production runtime substrate for domain agents. Owns the task lifecycle
+(knowledge readiness, control loop, session resume, sanitized telemetry,
+canonical `RuntimeRunRow` persistence + cost ledger) so domain repos stop
+inventing their own.
 
 ```bash
 pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
 ```
 
-## Getting started
+## What you get
+
+| Entry point | When to reach for it |
+|---|---|
+| `runAgentTask` | Single-shot adapter-driven task with eval/verification |
+| `runAgentTaskStream` | Streaming product loop with session resume + backends |
+| `startRuntimeRun` | Canonical production-run row + cost ledger (NEW in 0.7.0) |
+| `createTraceBridge` | Map `RuntimeStreamEvent` → `agent-eval` `TraceEvent` (NEW in 0.7.0) |
+| `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI |
+| `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) |
+| `createSandboxPromptBackend` | Sandbox / sidecar `streamPrompt` clients |
+| `createRuntimeStreamEventCollector` | Default-redacted sanitized telemetry over a stream |
+
+Every public export is annotated `@stable` or `@experimental`. `@stable`
+exports do not change shape inside a minor; `@experimental` exports may
+change inside a minor and require a deliberate consumer bump.
 
-The smallest possible task — a domain adapter responding to one task with
-no streaming:
+## Quickstart
 
 ```ts
 import { runAgentTask } from '@tangle-network/agent-runtime'
@@ -63,7 +41,7 @@ const result = await runAgentTask({
     async observe() { return { /* domain state */ } },
     async validate({ state }) { return [/* eval results */] },
     async decide({ state }) {
-      return { kind: 'finish', reason: 'review complete' }
+      return { type: 'stop', pass: true, score: 1, reason: 'review complete' }
     },
     async act() { return undefined },
   },
@@ -72,165 +50,119 @@ const result = await runAgentTask({
 console.log(result.status, result.runRecords)
 ```
 
-Full runnable: [`examples/basic-task/`](./examples/basic-task/).
-
-## When to use which entry point
-
-| You want… | Use |
-|---|---|
-| Single-shot task with eval/verification | `runAgentTask` |
-| Streaming product loop with session resume | `runAgentTaskStream` + a backend factory |
-| Just SSE serialization for an existing readiness report | `readinessServerSentEvent` |
-| Just sanitized telemetry over an existing run | `createRuntimeEventCollector` (+ `summarizeAgentTaskRun`) for `runAgentTask`, or `createRuntimeStreamEventCollector` for `runAgentTaskStream` |
-| Stable readiness branching (`ready` / `blocked` / `caveat`) in a route | `decideKnowledgeReadiness` |
-
-## Backends for `runAgentTaskStream`
+## Canonical production-run lifecycle (NEW in 0.7.0)
 
-Three SDK-agnostic factories ship in core:
-
-| Factory | When |
-|---|---|
-| `createOpenAICompatibleBackend` | TCloud / OpenAI-compatible chat APIs |
-| `createSandboxPromptBackend` | Sandbox / sidecar `streamPrompt` clients |
-| `createIterableBackend` | Custom coding harnesses, browser agents |
-
-For [cli-bridge](https://github.com/drewstone/cli-bridge) (or any other
-OpenAI-compatible HTTP gateway), use `createOpenAICompatibleBackend` pointed
-at the gateway's `/v1/chat/completions` URL — the cli-bridge harness/model
-selector is just an OpenAI `model` string like `claude/sonnet` or
-`codex/gpt-5-codex`.
-
-Adapters are intentionally thin. Product repos still own client
-construction, auth, concrete tool permissions, and UI behavior. See
-[`examples/sandbox-stream-backend/`](./examples/sandbox-stream-backend/) and
-[`examples/openai-stream-backend/`](./examples/openai-stream-backend/) for
-runnable wirings.
-
-## Lifecycle events
-
-`runAgentTask` and `runAgentTaskStream` emit typed lifecycle events
-through `onEvent`:
+`startRuntimeRun` is the ONE abstraction for "the agent did a thing on
+behalf of a customer; record what it did, what it cost, how it ended."
+Replaces bespoke `agentRuns`-row helpers (legal-agent's
+`completeProductionAgentRun` + `persistRuntimeRun` pair is the canonical
+example of what this subsumes).
 
 ```ts
-await runAgentTask({
-  task, adapter,
-  onEvent(event) {
-    console.log(event.type)
-  },
+import { startRuntimeRun, runAgentTaskStream } from '@tangle-network/agent-runtime'
+
+const run = startRuntimeRun({
+  workspaceId: 'ws-1',
+  sessionId: threadId,
+  agentId: 'legal-chat-runtime',
+  taskSpec,
+  scenarioId: `legal-chat:${threadId}`,
+  adapter: { upsert: (row) => db.insert(agentRuns).values(row) },
 })
-```
 
-Events cover readiness, question answering, acquisition, control-loop
-steps, and task completion. Every transition is observable without
-coupling domain adapters to logging, streaming, or telemetry concerns.
-
-This package does **not** stream model tokens for you. Domain adapters
-and product routes still own model calls, tool execution, and token
-streaming. agent-runtime emits lifecycle events around those actions.
+for await (const event of runAgentTaskStream({ task: taskSpec, backend, input })) {
+  run.observe(event) // llm_call events update the cost ledger
+  if (event.type === 'final') {
+    run.complete({
+      status: event.status === 'completed' ? 'completed' : 'failed',
+      resultSummary: event.text ?? '',
+      error: event.status === 'failed' ? event.reason : undefined,
+    })
+  }
+}
 
-## Knowledge providers
+await run.persist({ runtimeEvents: telemetry.events })
+console.log(run.cost()) // { tokensIn, tokensOut, costUsd, wallMs, llmCalls }
+```
 
-Optional. A knowledge provider implements:
+Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
 
-- `buildReadiness` — score readiness against the task's required knowledge
-- `answerQuestions` — handle outstanding user questions
-- `executeAcquisitionPlans` — fetch missing evidence
-- `refreshReadiness` — rerun scoring after acquisition
+## agent-eval trace bridge (NEW in 0.7.0)
 
-Lets a task collect missing context before the control loop starts, then
-rerun readiness against new evidence. If readiness fails, `runAgentTask`
-stops before domain actions; adapters can override `onKnowledgeBlocked`
-to emit a domain action (asking a user, querying a connector, etc.).
+If you persist traces in agent-eval's `TraceStore`, map runtime stream
+events to `TraceEvent` once and stop hand-rolling the adapter in every
+domain repo:
 
-For control policies or route handlers that need a stable readiness
-branch, use `decideKnowledgeReadiness(report)` — it returns `ready`,
-`blocked`, or `caveat` plus gap IDs and the recommended action.
+```ts
+import { createTraceBridge } from '@tangle-network/agent-runtime'
 
-## Sanitized telemetry
+const bridge = createTraceBridge({ runId, spanId })
+for await (const event of runAgentTaskStream({ task, backend, input })) {
+  const trace = bridge.toTraceEvent(event)
+  if (trace) await traceStore.appendEvent(trace)
+}
+```
 
-For logs, reports, UI telemetry — never serialize raw events directly.
-Use the built-in sanitized collector:
+## Error taxonomy
 
-```ts
-import {
-  createRuntimeEventCollector,
-  summarizeAgentTaskRun,
-} from '@tangle-network/agent-runtime'
+Every public function throws one of:
 
-const telemetry = createRuntimeEventCollector()
-const result = await runAgentTask({ task, adapter, onEvent: telemetry.onEvent })
+| Error | When |
+|---|---|
+| `ValidationError` | Caller passed invalid arguments |
+| `ConfigError` | Required env / config missing |
+| `NotFoundError` | A named resource does not exist |
+| `BackendTransportError` | Backend HTTP / IPC call returned non-success |
+| `SessionMismatchError` | Resume requested against a different backend |
+| `RuntimeRunStateError` | `RuntimeRunHandle` lifecycle methods called out of order |
 
-console.log(telemetry.events)
-console.log(summarizeAgentTaskRun(result))
-```
+All extend `AgentEvalError` (re-exported from `@tangle-network/agent-eval`)
+and carry a stable `code` so cross-package handlers can pattern-match
+without importing the runtime.
 
-By default, the collector redacts task inputs, user answers, credential
-questions, control payloads, evidence IDs, task metadata, and eval
-details. Private diagnostics opt-in via `RuntimeTelemetryOptions` flags
-(`includeInputs`, `includeUserAnswers`, `includeControlPayloads`,
-`includeEvidenceIds`, `includeRequirementDescriptions`,
-`includeMetadata`, `includeEvalDetails`).
+## Sanitized telemetry
 
-For `runAgentTaskStream`, use the sibling
-`createRuntimeStreamEventCollector`:
+`task.intent` flows through sanitized telemetry on every event. **Never
+set it to user input** — use a fixed string describing the operation
+kind (e.g. `"Run a chat turn"`, `"Score a tax return"`). Route user-
+visible content through `task.inputs` (redacted by default).
 
 ```ts
-import {
-  createRuntimeStreamEventCollector,
-  runAgentTaskStream,
-} from '@tangle-network/agent-runtime'
+import { createRuntimeStreamEventCollector, runAgentTaskStream } from '@tangle-network/agent-runtime'
 
 const telemetry = createRuntimeStreamEventCollector()
 for await (const event of runAgentTaskStream({ task, backend })) {
   telemetry.onEvent(event)
 }
-
-console.log(telemetry.events)
-console.log(telemetry.summary())
+console.log(telemetry.events, telemetry.summary())
 ```
 
-Same `RuntimeTelemetryOptions` flags apply. Streaming and non-streaming
-events have different field shapes (timestamps, sessions, text/tool
-deltas), which is why the factories are siblings rather than overloads —
-a single dispatcher would silently misroute events whose `type` literals
-overlap (`task_start`, `readiness_end`, etc.).
-
-### `task.intent` is sanitized telemetry by default
-
-`task.intent` flows through sanitized telemetry on every event. **Never
-set it to user input** — use a fixed string describing the operation
-kind (e.g. `"Run a chat turn"`, `"Score a tax return"`). If you need to
-log user-visible intent, route it through `inputs` (which are redacted
-by default) instead.
-
-For SSE-over-HTTP, use the helpers:
-
-```ts
-import { readinessServerSentEvent } from '@tangle-network/agent-runtime'
-writer.write(encoder.encode(readinessServerSentEvent(readinessReport)))
-```
+By default the collector redacts task inputs, user answers, credential
+questions, control payloads, evidence IDs, task metadata, and eval
+details. Private diagnostics opt-in via `RuntimeTelemetryOptions`.
 
 ## Package boundaries
 
 | Package | Owns |
 |---|---|
-| `agent-runtime` | Reusable lifecycle and adapter contracts |
-| `agent-eval` | Control loops, readiness scoring, traces, evals, failure classes, optimization, release evidence |
+| `agent-runtime` | Lifecycle, adapters, backends, `RuntimeRunHandle`, trace bridge |
+| `agent-eval` | Control loops, readiness scoring, traces, evals, failure classes, release evidence |
 | `agent-knowledge` | Evidence, claims, wiki pages, retrieval, knowledge bundle builders |
 | Domain packages | Domain tools, policies, credentials, UI text, rubrics |
 
 The API uses `runAgentTask`, not `runVerticalAgentTask`. `domain` is
-metadata on the task, because the runtime should be reusable across many
-kinds of agents without baking taxonomy into type names.
+metadata on the task because the runtime is reusable across many kinds of
+agents without baking taxonomy into type names.
 
 ## Examples
 
 Runnable in [`examples/`](./examples/):
 
-- [`basic-task/`](./examples/basic-task/) — the smallest `runAgentTask`
-- [`with-knowledge-readiness/`](./examples/with-knowledge-readiness/) — readiness gating + custom `onKnowledgeBlocked`
-- [`sanitized-telemetry/`](./examples/sanitized-telemetry/) — `createRuntimeEventCollector` + redaction policy
-- [`sanitized-telemetry-streaming/`](./examples/sanitized-telemetry-streaming/) — `createRuntimeStreamEventCollector` + redaction policy for `runAgentTaskStream`
+- [`basic-task/`](./examples/basic-task/) — smallest `runAgentTask`
+- [`with-knowledge-readiness/`](./examples/with-knowledge-readiness/) — readiness gating + `onKnowledgeBlocked`
+- [`sanitized-telemetry/`](./examples/sanitized-telemetry/) — `createRuntimeEventCollector` + redaction
+- [`sanitized-telemetry-streaming/`](./examples/sanitized-telemetry-streaming/) — streaming collector + redaction
 - [`sse-stream/`](./examples/sse-stream/) — Server-Sent Events for browser clients
-- [`sandbox-stream-backend/`](./examples/sandbox-stream-backend/) — `runAgentTaskStream` with `createSandboxPromptBackend` (synthetic sandbox client; real one in `agent-builder`)
-- [`openai-stream-backend/`](./examples/openai-stream-backend/) — `runAgentTaskStream` with `createOpenAICompatibleBackend`
+- [`sandbox-stream-backend/`](./examples/sandbox-stream-backend/) — `createSandboxPromptBackend`
+- [`openai-stream-backend/`](./examples/openai-stream-backend/) — `createOpenAICompatibleBackend`
+- [`runtime-run/`](./examples/runtime-run/) — `startRuntimeRun` + cost ledger + persistence adapter (NEW)
diff --git a/biome.json b/biome.json
new file mode 100644
index 0000000..543a0f8
--- /dev/null
+++ b/biome.json
@@ -0,0 +1,58 @@
+{
+  "$schema": "https://biomejs.dev/schemas/2.4.15/schema.json",
+  "files": {
+    "includes": ["src/**", "tests/**", "examples/**/*.ts", "examples/**/*.tsx"],
+    "ignoreUnknown": true
+  },
+  "formatter": {
+    "enabled": true,
+    "indentStyle": "space",
+    "indentWidth": 2,
+    "lineWidth": 100,
+    "lineEnding": "lf"
+  },
+  "javascript": {
+    "formatter": {
+      "quoteStyle": "single",
+      "semicolons": "asNeeded",
+      "trailingCommas": "all",
+      "arrowParentheses": "always"
+    }
+  },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true,
+      "suspicious": {
+        "noExplicitAny": "off",
+        "noConsole": "off",
+        "noAssignInExpressions": "warn",
+        "noImplicitAnyLet": "warn"
+      },
+      "style": {
+        "useImportType": "warn",
+        "useExportType": "warn",
+        "useNodejsImportProtocol": "error",
+        "noNonNullAssertion": "off",
+        "useTemplate": "warn",
+        "useExponentiationOperator": "warn",
+        "useShorthandFunctionType": "warn"
+      },
+      "complexity": {
+        "noUselessTypeConstraint": "warn",
+        "noBannedTypes": "warn"
+      },
+      "correctness": {
+        "noUnusedVariables": "off",
+        "noUnusedImports": "warn"
+      }
+    }
+  },
+  "assist": {
+    "actions": {
+      "source": {
+        "organizeImports": "on"
+      }
+    }
+  }
+}
diff --git a/examples/basic-task/basic-task.ts b/examples/basic-task/basic-task.ts
index 7c72c59..722efad 100644
--- a/examples/basic-task/basic-task.ts
+++ b/examples/basic-task/basic-task.ts
@@ -5,10 +5,12 @@
  *   pnpm tsx examples/basic-task/basic-task.ts
  */
 
-import { runAgentTask } from '@tangle-network/agent-runtime'
 import type { AgentAdapter } from '@tangle-network/agent-runtime'
+import { runAgentTask } from '@tangle-network/agent-runtime'
 
-interface TaxState { reviewCount: number }
+interface TaxState {
+  reviewCount: number
+}
 type TaxAction = { kind: 'review' }
 
 let reviews = 0
diff --git a/examples/openai-stream-backend/openai-stream-backend.ts b/examples/openai-stream-backend/openai-stream-backend.ts
index 669c015..25c97a5 100644
--- a/examples/openai-stream-backend/openai-stream-backend.ts
+++ b/examples/openai-stream-backend/openai-stream-backend.ts
@@ -7,8 +7,8 @@
  */
 
 import {
-  InMemoryRuntimeSessionStore,
   createOpenAICompatibleBackend,
+  InMemoryRuntimeSessionStore,
   runAgentTaskStream,
   runtimeStreamServerSentEvent,
 } from '@tangle-network/agent-runtime'
diff --git a/examples/runtime-run/README.md b/examples/runtime-run/README.md
new file mode 100644
index 0000000..a064f2a
--- /dev/null
+++ b/examples/runtime-run/README.md
@@ -0,0 +1,29 @@
+# runtime-run
+
+Canonical `RuntimeRunHandle` lifecycle: drive a streaming task through
+`runAgentTaskStream`, observe `llm_call` events into a cost ledger, and
+persist a `RuntimeRunRow` to your durable store.
+
+Use as the replacement for bespoke `agentRuns`-row plumbing (legal-agent's
+`completeProductionAgentRun` + `persistRuntimeRun` pair is the canonical
+example of what this pattern subsumes).
+
+## Run
+
+```bash
+pnpm tsx examples/runtime-run/runtime-run.ts
+```
+
+## What it shows
+
+- `startRuntimeRun({ workspaceId, sessionId, taskSpec, adapter })` to open a run
+- `handle.observe(event)` per yielded `RuntimeStreamEvent` to keep the cost
+  ledger in sync (only `llm_call` events contribute; everything else is a
+  no-op so you can pipe the whole stream through `observe`)
+- `handle.complete({ status, resultSummary, error? })` exactly once at end-of-
+  stream (idempotent for the same status, throws for status transitions)
+- `handle.persist()` to write a `RuntimeRunRow` via your
+  `RuntimeRunPersistenceAdapter` (D1, postgres, KV — anything with an
+  `upsert(row)`)
+- `handle.cost()` returns the accumulated `{ tokensIn, tokensOut, costUsd,
+  wallMs, llmCalls }` for cost dashboards
diff --git a/examples/runtime-run/runtime-run.ts b/examples/runtime-run/runtime-run.ts
new file mode 100644
index 0000000..5c57ae0
--- /dev/null
+++ b/examples/runtime-run/runtime-run.ts
@@ -0,0 +1,121 @@
+/**
+ * Production-run lifecycle: drive a streaming task through `runAgentTaskStream`
+ * AND record a canonical `RuntimeRunRow` for cost/audit dashboards.
+ *
+ * This is the pattern that replaces legal-agent's bespoke
+ * `completeProductionAgentRun` + `persistRuntimeRun` pair. Wire it into your
+ * own DB by implementing the `RuntimeRunPersistenceAdapter` interface (one
+ * `upsert(row)` method).
+ *
+ * Run with:
+ *   pnpm tsx examples/runtime-run/runtime-run.ts
+ */
+
+import {
+  type AgentBackendInput,
+  type AgentTaskSpec,
+  createIterableBackend,
+  type RuntimeRunPersistenceAdapter,
+  type RuntimeRunRow,
+  runAgentTaskStream,
+  startRuntimeRun,
+} from '@tangle-network/agent-runtime'
+
+const readyTask: AgentTaskSpec = {
+  id: 'legal-chat:thread-42',
+  intent: 'Run a legal advisory chat turn with workspace context.',
+  domain: 'legal',
+  metadata: { workspaceId: 'ws-1', threadId: 'thread-42' },
+}
+
+// Toy backend that yields a couple of llm_call events so the cost ledger has
+// real input. Real consumers plug in `createOpenAICompatibleBackend`,
+// `createSandboxPromptBackend`, or any `AgentExecutionBackend`.
+const backend = createIterableBackend<AgentBackendInput>({
+  kind: 'demo',
+  async *stream(_input, ctx) {
+    yield {
+      type: 'llm_call',
+      task: ctx.task,
+      session: ctx.session,
+      model: 'claude-sonnet-4-6',
+      tokensIn: 1_200,
+      tokensOut: 280,
+      costUsd: 0.0042,
+      latencyMs: 510,
+      timestamp: new Date().toISOString(),
+    }
+    yield {
+      type: 'text_delta',
+      task: ctx.task,
+      session: ctx.session,
+      text: 'Reviewed the matter. No blocking issues found.',
+      timestamp: new Date().toISOString(),
+    }
+    yield {
+      type: 'llm_call',
+      task: ctx.task,
+      session: ctx.session,
+      model: 'claude-sonnet-4-6',
+      tokensIn: 600,
+      tokensOut: 110,
+      costUsd: 0.0019,
+      latencyMs: 220,
+      timestamp: new Date().toISOString(),
+    }
+  },
+})
+
+// In-memory adapter for demonstration. Real adapters write to D1 / postgres /
+// the agent's `agentRuns` table — same `upsert(row)` shape.
+const persisted: RuntimeRunRow[] = []
+const adapter: RuntimeRunPersistenceAdapter = {
+  upsert(row) {
+    persisted.push(row)
+  },
+}
+
+async function main(): Promise<void> {
+  const run = startRuntimeRun({
+    workspaceId: 'ws-1',
+    sessionId: 'thread-42',
+    agentId: 'legal-chat-runtime',
+    taskSpec: readyTask,
+    scenarioId: 'legal-chat:thread-42',
+    adapter,
+  })
+
+  try {
+    for await (const event of runAgentTaskStream({
+      task: readyTask,
+      backend,
+      input: { message: 'Please review the latest filing.' },
+    })) {
+      run.observe(event)
+      if (event.type === 'final') {
+        const status = event.status === 'completed' ? 'completed' : 'failed'
+        run.complete({
+          status,
+          resultSummary: status === 'completed' ? 'Reviewed' : 'Stream did not complete cleanly',
+          error: status === 'failed' ? event.reason : undefined,
+        })
+      }
+    }
+  } catch (err) {
+    run.complete({
+      status: 'failed',
+      resultSummary: 'Stream threw before final event',
+      error: err instanceof Error ? err.message : String(err),
+    })
+  }
+
+  await run.persist({ note: 'demo persistence metadata' })
+
+  console.log('Cost ledger:', run.cost())
+  console.log('Persisted row:', persisted[0])
+}
+
+main().catch((err) => {
+  console.error(err)
+  process.exit(1)
+})
diff --git a/examples/sandbox-stream-backend/sandbox-stream-backend.ts b/examples/sandbox-stream-backend/sandbox-stream-backend.ts
index be06c54..b830a10 100644
--- a/examples/sandbox-stream-backend/sandbox-stream-backend.ts
+++ b/examples/sandbox-stream-backend/sandbox-stream-backend.ts
@@ -9,8 +9,8 @@
  */
 
 import {
-  InMemoryRuntimeSessionStore,
   createSandboxPromptBackend,
+  InMemoryRuntimeSessionStore,
   runAgentTaskStream,
   runtimeStreamServerSentEvent,
 } from '@tangle-network/agent-runtime'
@@ -33,7 +33,7 @@ const sandboxClient = {
   get(id: string): SandboxBox {
     return {
       id,
-      async * streamPrompt(message: string) {
+      async *streamPrompt(message: string) {
         // A real sandbox forwards the prompt to a model + tools and
         // yields streamed tokens. Here we just yield three fragments.
         yield { type: 'text_delta' as const, text: `received: ${message}\n` }
diff --git a/examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts b/examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts
index ec6c209..53f5379 100644
--- a/examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts
+++ b/examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts
@@ -19,11 +19,11 @@
  */
 
 import {
+  type AgentBackendInput,
   createIterableBackend,
   createRuntimeStreamEventCollector,
-  runAgentTaskStream,
-  type AgentBackendInput,
   type RuntimeStreamEvent,
+  runAgentTaskStream,
 } from '@tangle-network/agent-runtime'
 
 // A synthetic backend that yields a small streaming script. In a real
@@ -31,7 +31,7 @@ import {
 // CLI bridge — the redaction story is identical.
 const backend = createIterableBackend<AgentBackendInput>({
   kind: 'demo-stream',
-  async * stream(_input, ctx) {
+  async *stream(_input, ctx) {
     yield {
       type: 'text_delta',
       task: ctx.task,
diff --git a/examples/sanitized-telemetry/sanitized-telemetry.ts b/examples/sanitized-telemetry/sanitized-telemetry.ts
index 2e56f6c..c1bfd57 100644
--- a/examples/sanitized-telemetry/sanitized-telemetry.ts
+++ b/examples/sanitized-telemetry/sanitized-telemetry.ts
@@ -5,18 +5,26 @@
  *   pnpm tsx examples/sanitized-telemetry/sanitized-telemetry.ts
  */
 
+import type { AgentAdapter } from '@tangle-network/agent-runtime'
 import {
   createRuntimeEventCollector,
   runAgentTask,
   summarizeAgentTaskRun,
 } from '@tangle-network/agent-runtime'
-import type { AgentAdapter } from '@tangle-network/agent-runtime'
 
 const adapter: AgentAdapter<{ ready: boolean }, void, void> = {
-  async observe() { return { ready: true } },
-  async validate() { return [{ id: 'ok', score: 1, passed: true }] },
-  async decide() { return { kind: 'finish', reason: 'demo done' } },
-  async act() { return undefined },
+  async observe() {
+    return { ready: true }
+  },
+  async validate() {
+    return [{ id: 'ok', score: 1, passed: true }]
+  },
+  async decide() {
+    return { kind: 'finish', reason: 'demo done' }
+  },
+  async act() {
+    return undefined
+  },
 }
 
 async function main() {
diff --git a/examples/sse-stream/sse-stream.ts b/examples/sse-stream/sse-stream.ts
index ff24c51..8f27fbf 100644
--- a/examples/sse-stream/sse-stream.ts
+++ b/examples/sse-stream/sse-stream.ts
@@ -5,17 +5,14 @@
  *   pnpm tsx examples/sse-stream/sse-stream.ts
  */
 
+import { type KnowledgeRequirement, scoreKnowledgeReadiness } from '@tangle-network/agent-eval'
 import {
-  InMemoryRuntimeSessionStore,
   createIterableBackend,
+  InMemoryRuntimeSessionStore,
   readinessServerSentEvent,
   runAgentTaskStream,
   runtimeStreamServerSentEvent,
 } from '@tangle-network/agent-runtime'
-import {
-  scoreKnowledgeReadiness,
-  type KnowledgeRequirement,
-} from '@tangle-network/agent-eval'
 
 // ── 1. One-off readiness SSE — the kind of event you'd write to a
 // response stream when a task is gated by missing knowledge.
@@ -49,7 +46,7 @@ process.stdout.write(readinessServerSentEvent(readinessReport))
 // they map common shapes for you.
 const backend = createIterableBackend({
   kind: 'demo-iterable',
-  async * stream(input) {
+  async *stream(input) {
     const message = input.message ?? '(no message)'
     yield { type: 'text_delta' as const, text: `you said: ${message}\n` }
     yield { type: 'text_delta' as const, text: 'thinking...\n' }
diff --git a/examples/with-knowledge-readiness/with-knowledge-readiness.ts b/examples/with-knowledge-readiness/with-knowledge-readiness.ts
index cdc1b1f..7414ccf 100644
--- a/examples/with-knowledge-readiness/with-knowledge-readiness.ts
+++ b/examples/with-knowledge-readiness/with-knowledge-readiness.ts
@@ -6,9 +6,9 @@
  *   pnpm tsx examples/with-knowledge-readiness/with-knowledge-readiness.ts
  */
 
-import { runAgentTask } from '@tangle-network/agent-runtime'
-import type { AgentAdapter } from '@tangle-network/agent-runtime'
 import type { KnowledgeRequirement } from '@tangle-network/agent-eval'
+import type { AgentAdapter } from '@tangle-network/agent-runtime'
+import { runAgentTask } from '@tangle-network/agent-runtime'
 
 function requirement(currentConfidence: number): KnowledgeRequirement {
   return {
@@ -28,10 +28,18 @@ function requirement(currentConfidence: number): KnowledgeRequirement {
 }
 
 const adapter: AgentAdapter<{ ready: boolean }, void, void> = {
-  observe() { return { ready: true } },
-  validate() { return [] },
-  decide() { return { kind: 'finish', reason: 'demo done' } },
-  act() { return undefined },
+  observe() {
+    return { ready: true }
+  },
+  validate() {
+    return []
+  },
+  decide() {
+    return { kind: 'finish', reason: 'demo done' }
+  },
+  act() {
+    return undefined
+  },
 }
 
 async function main() {
@@ -48,7 +56,10 @@ async function main() {
   console.log('blocked status:', blocked.status)
   console.log('  readinessScore:', blocked.knowledge.readinessScore)
   console.log('  recommendedAction:', blocked.knowledge.recommendedAction)
-  console.log('  blocking gaps:', blocked.knowledge.blockingMissingRequirements.map((r) => r.id))
+  console.log(
+    '  blocking gaps:',
+    blocked.knowledge.blockingMissingRequirements.map((r) => r.id),
+  )
 
   // Run 2: full confidence → readiness passes → control loop runs.
   const ready = await runAgentTask({
diff --git a/package.json b/package.json
index d7af3d4..d474e31 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.6.0",
+  "version": "0.7.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -34,12 +34,15 @@
     "prepare": "tsup",
     "test": "vitest run",
     "test:watch": "vitest",
+    "lint": "biome check src tests examples",
+    "lint:fix": "biome check --write src tests examples",
     "typecheck": "tsc --noEmit"
   },
   "dependencies": {
-    "@tangle-network/agent-eval": "^0.23.0"
+    "@tangle-network/agent-eval": "^0.24.0"
   },
   "devDependencies": {
+    "@biomejs/biome": "^2.4.0",
     "@types/node": "^25.6.0",
     "tsup": "^8.0.0",
     "typescript": "^5.7.0",
@@ -49,5 +52,10 @@
     "node": ">=20"
   },
   "license": "MIT",
-  "packageManager": "pnpm@10.28.0"
+  "packageManager": "pnpm@10.28.0",
+  "pnpm": {
+    "minimumReleaseAge": 4320,
+    "minimumReleaseAgeExclude": ["@tangle-network/agent-eval"],
+    "onlyBuiltDependencies": ["esbuild"]
+  }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index e41c79d..f46b92a 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,9 +9,12 @@ importers:
   .:
     dependencies:
       '@tangle-network/agent-eval':
-        specifier: ^0.23.0
-        version: 0.23.0(typescript@5.9.3)
+        specifier: ^0.24.0
+        version: 0.24.0(typescript@5.9.3)
     devDependencies:
+      '@biomejs/biome':
+        specifier: ^2.4.0
+        version: 2.4.15
       '@types/node':
         specifier: ^25.6.0
         version: 25.6.0
@@ -44,6 +47,59 @@ packages:
       zod:
         optional: true
 
+  '@biomejs/biome@2.4.15':
+    resolution: {integrity: sha512-j5VH3a/h/HXTKBM50MDMxRCzkeLv9S2XJcW2WgnZT1+xyisi+0bISrXR82gCX+8S9lvK0skEvHJRN+3Ktr2hlw==}
+    engines: {node: '>=14.21.3'}
+    hasBin: true
+
+  '@biomejs/cli-darwin-arm64@2.4.15':
+    resolution: {integrity: sha512-rF3PPqLq1yoST79zaQbDjVJwsuIeci/O+9bgNmC5QpgOqz6aqYuzA4abyAGx+mgyiDXn4A049xAN8gijbuR1Qg==}
+    engines: {node: '>=14.21.3'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@biomejs/cli-darwin-x64@2.4.15':
+    resolution: {integrity: sha512-/5KHXYMfSJs1fNXiX30xFtI8JcCFV6zaVVLxOa0M2sfqBKHkpQhRTv94yxQWxeTY2lzo2OuTlNvPC+hDQt2wcQ==}
+    engines: {node: '>=14.21.3'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@biomejs/cli-linux-arm64-musl@2.4.15':
+    resolution: {integrity: sha512-ZPcxznxm0pogHBLZhYntyR3sR+MrZjqJIKEr7ZqVen0Rl+P/4upVmfYXjftizi9RoqZntg33fv/1fbdhbYXpEQ==}
+    engines: {node: '>=14.21.3'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@biomejs/cli-linux-arm64@2.4.15':
+    resolution: {integrity: sha512-owaAMZD/T4LrD0ELNCk0Km3qrRHuM0X6EAyVE1FSqGY0rbLoiDLrO4Us2tllm6cAeB2Ioa9C2C08NZPdr8+0Ug==}
+    engines: {node: '>=14.21.3'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@biomejs/cli-linux-x64-musl@2.4.15':
+    resolution: {integrity: sha512-CNq/9W38SYSH023lfcQ4KKU8K0YX8T//FZUhcgtMMRABDojx5XsMV7jlweAvGSl389wJQB29Qo6Zb/a+jdvt+w==}
+    engines: {node: '>=14.21.3'}
+    cpu: [x64]
+    os: [linux]
+
+  '@biomejs/cli-linux-x64@2.4.15':
+    resolution: {integrity: sha512-0jj7THz12GbUOLmMibktK6DZjqz2zV64KFxyBtcFTKPiiOIY0a7vns1elpO1dERvxpsZ5ik0oFfz0oGwFde1+g==}
+    engines: {node: '>=14.21.3'}
+    cpu: [x64]
+    os: [linux]
+
+  '@biomejs/cli-win32-arm64@2.4.15':
+    resolution: {integrity: sha512-ouhkYdlhp/1GghEJPdWwD/Vi3gQ1nFxuSpMolWsbq3Lsq3QUR4jl6UdhhscdCugKU5vOEuMiJhvKj66O0OCq+w==}
+    engines: {node: '>=14.21.3'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@biomejs/cli-win32-x64@2.4.15':
+    resolution: {integrity: sha512-zBrGq5mx5wwpnow4+2BxUvleDM+GNd4sLbPaMapsSLQLD0NGRCquqPBTgN+7XkUteHvj7M+BstuI8tmnV7+HgQ==}
+    engines: {node: '>=14.21.3'}
+    cpu: [x64]
+    os: [win32]
+
   '@esbuild/aix-ppc64@0.27.7':
     resolution: {integrity: sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==}
     engines: {node: '>=18'}
@@ -386,8 +442,8 @@ packages:
   '@scure/bip39@2.2.0':
     resolution: {integrity: sha512-T/Bj/YvYMNkIPq6EENO6/rcs2e7qTNuyoUXf0KBFDmp0ZDu0H2X4Lq6yC3i0c8PcWkov5EbW+yQZZbdMmk154A==}
 
-  '@tangle-network/agent-eval@0.23.0':
-    resolution: {integrity: sha512-YY4J2v1epvTBJ3HeNAYs4AaeurgUZCTfmooGrmDbKeAfWSD6Xzv8RC33xChd1Tge/IGDz1ILRTfpLqyuhNU2aQ==}
+  '@tangle-network/agent-eval@0.24.0':
+    resolution: {integrity: sha512-Wwr0qIwI/m/HsNTotyZnfM+aLbcbgpAyGhqjgyO3YqJQaZckJx/DUH0AqppVIfOAoR2vZbdvTRaQNeltDidSrA==}
     engines: {node: '>=20'}
     hasBin: true
 
@@ -891,6 +947,41 @@ snapshots:
     optionalDependencies:
       zod: 4.4.2
 
+  '@biomejs/biome@2.4.15':
+    optionalDependencies:
+      '@biomejs/cli-darwin-arm64': 2.4.15
+      '@biomejs/cli-darwin-x64': 2.4.15
+      '@biomejs/cli-linux-arm64': 2.4.15
+      '@biomejs/cli-linux-arm64-musl': 2.4.15
+      '@biomejs/cli-linux-x64': 2.4.15
+      '@biomejs/cli-linux-x64-musl': 2.4.15
+      '@biomejs/cli-win32-arm64': 2.4.15
+      '@biomejs/cli-win32-x64': 2.4.15
+
+  '@biomejs/cli-darwin-arm64@2.4.15':
+    optional: true
+
+  '@biomejs/cli-darwin-x64@2.4.15':
+    optional: true
+
+  '@biomejs/cli-linux-arm64-musl@2.4.15':
+    optional: true
+
+  '@biomejs/cli-linux-arm64@2.4.15':
+    optional: true
+
+  '@biomejs/cli-linux-x64-musl@2.4.15':
+    optional: true
+
+  '@biomejs/cli-linux-x64@2.4.15':
+    optional: true
+
+  '@biomejs/cli-win32-arm64@2.4.15':
+    optional: true
+
+  '@biomejs/cli-win32-x64@2.4.15':
+    optional: true
+
   '@esbuild/aix-ppc64@0.27.7':
     optional: true
 
@@ -1104,7 +1195,7 @@ snapshots:
       '@noble/hashes': 2.2.0
       '@scure/base': 2.2.0
 
-  '@tangle-network/agent-eval@0.23.0(typescript@5.9.3)':
+  '@tangle-network/agent-eval@0.24.0(typescript@5.9.3)':
     dependencies:
       '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.2)
       '@ax-llm/ax': 19.0.45(zod@4.4.2)
diff --git a/src/backends.ts b/src/backends.ts
new file mode 100644
index 0000000..c1a0d0d
--- /dev/null
+++ b/src/backends.ts
@@ -0,0 +1,310 @@
+/**
+ * @stable
+ *
+ * Backend factories for `runAgentTaskStream`. Three shapes ship in core:
+ *
+ *  - `createIterableBackend` — wrap any custom async iterable into a backend
+ *  - `createSandboxPromptBackend` — sandbox / sidecar `streamPrompt` clients
+ *  - `createOpenAICompatibleBackend` — OpenAI-style chat completions endpoints
+ *
+ * Adapters stay thin: domain repos own auth, model selection, and the concrete
+ * tool surface. The factories handle session creation, stream normalization,
+ * and graceful end-of-stream signalling.
+ */
+
+import { BackendTransportError } from './errors'
+import { newRuntimeSession, nowIso, touchSession } from './sessions'
+import type {
+  AgentBackendContext,
+  AgentBackendInput,
+  AgentExecutionBackend,
+  RuntimeSession,
+  RuntimeStreamEvent,
+} from './types'
+
+/** @stable */
+export function createIterableBackend<TInput extends AgentBackendInput>(options: {
+  kind: string
+  start?: AgentExecutionBackend<TInput>['start']
+  resume?: AgentExecutionBackend<TInput>['resume']
+  stream: AgentExecutionBackend<TInput>['stream']
+  stop?: AgentExecutionBackend<TInput>['stop']
+}): AgentExecutionBackend<TInput> {
+  return options
+}
+
+/** @stable */
+export function createSandboxPromptBackend<
+  TBox,
+  TInput extends AgentBackendInput = AgentBackendInput,
+>(options: {
+  kind?: string
+  getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox
+  streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>
+  mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined
+  getSessionId?: (box: TBox, input: TInput) => string | undefined
+}): AgentExecutionBackend<TInput> {
+  const kind = options.kind ?? 'sandbox'
+  return {
+    kind,
+    async start(input, context) {
+      const box = await options.getBox(input, context)
+      return newRuntimeSession(
+        kind,
+        options.getSessionId?.(box, input) ?? context.requestedSessionId,
+        { resumable: true },
+      )
+    },
+    resume(session) {
+      return touchSession({ ...session, status: 'active' })
+    },
+    async *stream(input, context) {
+      const box = await options.getBox(input, context)
+      const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent
+      for await (const event of options.streamPrompt(box, message, context)) {
+        const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context)
+        if (mapped) yield mapped
+      }
+    },
+  }
+}
+
+/** @stable */
+export function createOpenAICompatibleBackend<
+  TInput extends AgentBackendInput = AgentBackendInput,
+>(options: {
+  apiKey: string
+  baseUrl: string
+  model: string
+  kind?: string
+  fetchImpl?: typeof fetch
+}): AgentExecutionBackend<TInput> {
+  const fetcher = options.fetchImpl ?? fetch
+  const kind = options.kind ?? 'tcloud'
+  return {
+    kind,
+    start(_input, context) {
+      return newRuntimeSession(kind, context.requestedSessionId)
+    },
+    async *stream(input, context) {
+      const response = await fetcher(`${options.baseUrl.replace(/\/$/, '')}/chat/completions`, {
+        method: 'POST',
+        headers: {
+          Authorization: `Bearer ${options.apiKey}`,
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+          model: options.model,
+          stream: true,
+          messages: input.messages ?? [
+            { role: 'user', content: input.message ?? context.task.intent },
+          ],
+        }),
+        signal: context.signal,
+      })
+      if (!response.ok) {
+        throw new BackendTransportError(kind, `chat backend returned ${response.status}`, {
+          status: response.status,
+        })
+      }
+      yield* streamResponseEvents(response, context)
+    },
+  }
+}
+
+/** @internal */
+export function normalizeBackendStreamEvent(
+  event: RuntimeStreamEvent,
+  task: AgentBackendContext['task'],
+  session: RuntimeSession,
+): RuntimeStreamEvent {
+  if (
+    'task' in event &&
+    event.task &&
+    'session' in event &&
+    event.session &&
+    'timestamp' in event &&
+    event.timestamp
+  ) {
+    return event
+  }
+  return {
+    ...event,
+    task: 'task' in event && event.task ? event.task : task,
+    session: 'session' in event && event.session ? event.session : session,
+    timestamp: 'timestamp' in event && event.timestamp ? event.timestamp : nowIso(),
+  } as RuntimeStreamEvent
+}
+
+function mapCommonBackendEvent(
+  event: unknown,
+  context: AgentBackendContext,
+): RuntimeStreamEvent | undefined {
+  if (!event || typeof event !== 'object') return undefined
+  const record = event as Record<string, unknown>
+  const type = String(record.type ?? '')
+  const data =
+    record.data && typeof record.data === 'object'
+      ? (record.data as Record<string, unknown>)
+      : record
+  if (type === 'message.part.updated' || type === 'text_delta' || type === 'delta') {
+    const text = stringValue(data.text) ?? stringValue(data.delta) ?? stringValue(record.text)
+    return text
+      ? {
+          type: 'text_delta',
+          task: context.task,
+          session: context.session,
+          text,
+          timestamp: nowIso(),
+        }
+      : undefined
+  }
+  if (type === 'reasoning_delta') {
+    const text = stringValue(data.text) ?? stringValue(record.text)
+    return text
+      ? {
+          type: 'reasoning_delta',
+          task: context.task,
+          session: context.session,
+          text,
+          timestamp: nowIso(),
+        }
+      : undefined
+  }
+  if (type === 'tool_call') {
+    return {
+      type: 'tool_call',
+      task: context.task,
+      session: context.session,
+      toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? 'tool',
+      toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
+      args: data.args ?? data.input ?? record.args,
+      timestamp: nowIso(),
+    }
+  }
+  if (type === 'tool_result') {
+    return {
+      type: 'tool_result',
+      task: context.task,
+      session: context.session,
+      toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? 'tool',
+      toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
+      result: data.result ?? data.output ?? record.result,
+      timestamp: nowIso(),
+    }
+  }
+  if (type === 'result' || type === 'final') {
+    const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text)
+    return text
+      ? {
+          type: 'text_delta',
+          task: context.task,
+          session: context.session,
+          text,
+          timestamp: nowIso(),
+        }
+      : undefined
+  }
+  return undefined
+}
+
+async function* streamResponseEvents(
+  response: Response,
+  context: AgentBackendContext,
+): AsyncIterable<RuntimeStreamEvent> {
+  const body = response.body
+  if (!body) return
+  const reader = body.getReader()
+  const decoder = new TextDecoder()
+  let buffer = ''
+  for (;;) {
+    const { done, value } = await reader.read()
+    if (done) break
+    buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, '\n')
+    for (const event of drainStreamBuffer(false)) yield event
+  }
+  buffer += decoder.decode().replace(/\r\n/g, '\n')
+  for (const event of drainStreamBuffer(true)) yield event
+  if (buffer.trim()) {
+    const event = parseStreamChunk(buffer, context)
+    if (event) yield event
+  }
+
+  function* drainStreamBuffer(flush: boolean): Iterable<RuntimeStreamEvent> {
+    for (;;) {
+      const sseBoundary = buffer.indexOf('\n\n')
+      if (sseBoundary >= 0) {
+        const chunk = buffer.slice(0, sseBoundary)
+        buffer = buffer.slice(sseBoundary + 2)
+        const event = parseStreamChunk(chunk, context)
+        if (event) yield event
+        continue
+      }
+
+      const newline = buffer.indexOf('\n')
+      if (newline >= 0 && !buffer.slice(0, newline).startsWith('data:')) {
+        const line = buffer.slice(0, newline)
+        buffer = buffer.slice(newline + 1)
+        const event = parseStreamChunk(line, context)
+        if (event) yield event
+        continue
+      }
+
+      if (flush && buffer.trim() && !buffer.trimStart().startsWith('data:')) {
+        const line = buffer
+        buffer = ''
+        const event = parseStreamChunk(line, context)
+        if (event) yield event
+        continue
+      }
+
+      break
+    }
+  }
+}
+
+function parseStreamChunk(
+  chunk: string,
+  context: AgentBackendContext,
+): RuntimeStreamEvent | undefined {
+  const lines = chunk.split(/\r?\n/)
+  const dataLines = lines.filter((line) => line.startsWith('data:'))
+  const data =
+    dataLines.length > 0
+      ? dataLines.map((line) => line.slice(5).trimStart()).join('\n')
+      : chunk.trim()
+  if (!data || data === '[DONE]') return undefined
+  try {
+    const parsed = JSON.parse(data) as Record<string, unknown>
+    const choices = parsed.choices
+    const choice = Array.isArray(choices)
+      ? (choices[0] as Record<string, unknown> | undefined)
+      : undefined
+    const delta = choice?.delta as Record<string, unknown> | undefined
+    const message = choice?.message as Record<string, unknown> | undefined
+    const text =
+      stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text)
+    if (text) {
+      return {
+        type: 'text_delta',
+        task: context.task,
+        session: context.session,
+        text,
+        timestamp: nowIso(),
+      }
+    }
+    return mapCommonBackendEvent(parsed, context)
+  } catch {
+    return {
+      type: 'text_delta',
+      task: context.task,
+      session: context.session,
+      text: data,
+      timestamp: nowIso(),
+    }
+  }
+}
+
+function stringValue(value: unknown): string | undefined {
+  return typeof value === 'string' && value.length > 0 ? value : undefined
+}
diff --git a/src/errors.ts b/src/errors.ts
new file mode 100644
index 0000000..3071021
--- /dev/null
+++ b/src/errors.ts
@@ -0,0 +1,89 @@
+/**
+ * @stable
+ *
+ * Error taxonomy for `@tangle-network/agent-runtime`.
+ *
+ * Public contract: every error this package throws as part of its consumer-
+ * facing API either extends `AgentEvalError` (re-exported here for ergonomic
+ * `instanceof` checks at the runtime boundary) or extends one of the
+ * runtime-specific subclasses below.
+ *
+ * Internal invariant guards (`throw new Error('this should never happen')`)
+ * remain plain `Error` — they are programmer-mistake assertions, not
+ * consumer-catchable contract failures.
+ *
+ * Subclassing strategy: where a runtime-specific failure maps cleanly to an
+ * agent-eval code (validation, config, not_found), we re-use the agent-eval
+ * subclass. Runtime-only failure modes (session resume against the wrong
+ * backend, backend transport errors) get fresh subclasses that still carry an
+ * `AgentEvalErrorCode` so cross-package handlers can pattern-match without
+ * importing the runtime.
+ */
+
+import { AgentEvalError } from '@tangle-network/agent-eval'
+
+export {
+  AgentEvalError,
+  type AgentEvalErrorCode,
+  CaptureIntegrityError,
+  ConfigError,
+  JudgeError,
+  NotFoundError,
+  ReplayError,
+  ValidationError,
+  VerificationError,
+} from '@tangle-network/agent-eval'
+
+/**
+ * @stable
+ *
+ * Caller asked to resume a session against a backend whose `kind` does not
+ * match the session's recorded backend. This is a routing bug — the same
+ * session id was reused across two different backend implementations — and
+ * is not retryable without picking the right backend.
+ */
+export class SessionMismatchError extends AgentEvalError {
+  readonly sessionBackend: string
+  readonly requestedBackend: string
+
+  constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {
+    super(
+      'validation',
+      `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,
+      options,
+    )
+    this.sessionBackend = sessionBackend
+    this.requestedBackend = requestedBackend
+  }
+}
+
+/**
+ * @stable
+ *
+ * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success
+ * status. Distinct from `JudgeError` (which is structural / unrecoverable)
+ * because backend failures are sometimes retryable and consumers may want to
+ * branch on the upstream status code.
+ */
+export class BackendTransportError extends AgentEvalError {
+  readonly backend: string
+  readonly status?: number
+
+  constructor(backend: string, message: string, options?: { cause?: unknown; status?: number }) {
+    super('config', message, options)
+    this.backend = backend
+    this.status = options?.status
+  }
+}
+
+/**
+ * @stable
+ *
+ * A runtime-run lifecycle method was called in an order the state machine does
+ * not allow: `persist()` before `complete()`, `complete()` twice, etc.
+ */
+export class RuntimeRunStateError extends AgentEvalError {
+  constructor(message: string, options?: { cause?: unknown }) {
+    super('validation', message, options)
+  }
+}
diff --git a/src/index.ts b/src/index.ts
index ca4ae87..65597eb 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,1379 +1,17 @@
-import {
-  acquisitionPlansForKnowledgeGaps,
-  blockingKnowledgeEval,
-  runAgentControlLoop,
-  scoreKnowledgeReadiness,
-  userQuestionsForKnowledgeGaps,
-  type ControlBudget,
-  type ControlContext,
-  type ControlDecision,
-  type ControlEvalResult,
-  type ControlRunResult,
-  type ControlStep,
-  type DataAcquisitionPlan,
-  type KnowledgeReadinessReport,
-  type KnowledgeRequirement,
-  type RunRecord,
-  type TraceStore,
-  type UserQuestion,
-} from '@tangle-network/agent-eval'
-
-export interface AgentTaskSpec {
-  id: string
-  intent: string
-  /** Domain is metadata, not an architectural boundary: tax, legal, gtm, creative, blueprint, redteam, etc. */
-  domain?: string
-  inputs?: Record<string, unknown>
-  requiredKnowledge?: KnowledgeRequirement[]
-  budget?: Partial<ControlBudget>
-  metadata?: Record<string, unknown>
-}
-
-export interface AgentKnowledgeProvider {
-  buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport
-  answerQuestions?(questions: UserQuestion[], task: AgentTaskSpec): Promise<Record<string, string>> | Record<string, string>
-  executeAcquisitionPlans?(plans: DataAcquisitionPlan[], task: AgentTaskSpec): Promise<string[]> | string[]
-  refreshReadiness?(input: {
-    task: AgentTaskSpec
-    previous: KnowledgeReadinessReport
-    userAnswers: Record<string, string>
-    acquiredEvidenceIds: string[]
-  }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport
-}
-
-export interface AgentTaskContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
-  task: AgentTaskSpec
-  knowledge: KnowledgeReadinessReport
-  state: TState
-  evals: TEval[]
-  history: ControlStep<TState, TAction, TActionResult, TEval>[]
-  budget: ControlBudget
-  stepIndex: number
-  wallMs: number
-  spentCostUsd: number
-  remainingCostUsd?: number
-  abortSignal: AbortSignal
-}
-
-export interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
-  observe(ctx: {
-    task: AgentTaskSpec
-    knowledge: KnowledgeReadinessReport
-    history: ControlStep<TState, TAction, TActionResult, TEval>[]
-    abortSignal: AbortSignal
-  }): Promise<TState> | TState
-
-  validate(ctx: {
-    task: AgentTaskSpec
-    knowledge: KnowledgeReadinessReport
-    state: TState
-    history: ControlStep<TState, TAction, TActionResult, TEval>[]
-    abortSignal: AbortSignal
-  }): Promise<TEval[]> | TEval[]
-
-  decide(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<ControlDecision<TAction>> | ControlDecision<TAction>
-
-  act(action: TAction, ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<TActionResult> | TActionResult
-
-  shouldStop?(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<{
-    stop: boolean
-    pass: boolean
-    reason: string
-    score?: number
-  }> | {
-    stop: boolean
-    pass: boolean
-    reason: string
-    score?: number
-  }
-
-  onKnowledgeBlocked?(ctx: {
-    task: AgentTaskSpec
-    knowledge: KnowledgeReadinessReport
-    questions: UserQuestion[]
-    acquisitionPlans: DataAcquisitionPlan[]
-  }): Promise<ControlDecision<TAction>> | ControlDecision<TAction>
-
-  getActionCostUsd?(ctx: {
-    action: TAction
-    result: TActionResult
-    task: AgentTaskSpec
-    state: TState
-    evals: TEval[]
-    history: ControlStep<TState, TAction, TActionResult, TEval>[]
-  }): number | undefined
-
-  projectRunRecords?(result: ControlRunResult<TState, TAction, TActionResult, TEval>, task: AgentTaskSpec): RunRecord[]
-}
-
-export type AgentTaskStatus =
-  | 'completed'
-  | 'blocked'
-  | 'failed'
-  | 'aborted'
-
-export type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> =
-  | { type: 'task_start'; task: AgentTaskSpec }
-  | { type: 'readiness_start'; task: AgentTaskSpec }
-  | { type: 'readiness_end'; task: AgentTaskSpec; knowledge: KnowledgeReadinessReport }
-  | { type: 'questions_start'; task: AgentTaskSpec; questions: UserQuestion[] }
-  | { type: 'questions_end'; task: AgentTaskSpec; questions: UserQuestion[]; userAnswers: Record<string, string> }
-  | { type: 'acquisition_start'; task: AgentTaskSpec; acquisitionPlans: DataAcquisitionPlan[] }
-  | { type: 'acquisition_end'; task: AgentTaskSpec; acquisitionPlans: DataAcquisitionPlan[]; acquiredEvidenceIds: string[] }
-  | { type: 'control_start'; task: AgentTaskSpec; knowledge: KnowledgeReadinessReport }
-  | { type: 'control_step'; task: AgentTaskSpec; step: ControlStep<TState, TAction, TActionResult, TEval> }
-  | { type: 'control_end'; task: AgentTaskSpec; control: ControlRunResult<TState, TAction, TActionResult, TEval> }
-  | { type: 'task_end'; task: AgentTaskSpec; status: AgentTaskStatus; reason: string }
-
-export type AgentRuntimeEventSink<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = (
-  event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>,
-) => Promise<void> | void
-
-export type RuntimeStreamEvent =
-  | { type: 'task_start'; task: AgentTaskSpec; timestamp: string }
-  | { type: 'readiness_start'; task: AgentTaskSpec; timestamp: string }
-  | { type: 'readiness_end'; task: AgentTaskSpec; knowledge: KnowledgeReadinessReport; decision: KnowledgeReadinessDecision; timestamp: string }
-  | { type: 'questions_start'; task: AgentTaskSpec; questions: UserQuestion[]; timestamp: string }
-  | { type: 'questions_end'; task: AgentTaskSpec; questions: UserQuestion[]; userAnswers: Record<string, string>; timestamp: string }
-  | { type: 'acquisition_start'; task: AgentTaskSpec; acquisitionPlans: DataAcquisitionPlan[]; timestamp: string }
-  | { type: 'acquisition_end'; task: AgentTaskSpec; acquisitionPlans: DataAcquisitionPlan[]; acquiredEvidenceIds: string[]; timestamp: string }
-  | { type: 'session_created'; task: AgentTaskSpec; session: RuntimeSession; timestamp: string }
-  | { type: 'session_resumed'; task: AgentTaskSpec; session: RuntimeSession; timestamp: string }
-  | { type: 'backend_start'; task: AgentTaskSpec; session: RuntimeSession; backend: string; timestamp: string }
-  | { type: 'text_delta'; task?: AgentTaskSpec; session?: RuntimeSession; text: string; timestamp?: string }
-  | { type: 'reasoning_delta'; task?: AgentTaskSpec; session?: RuntimeSession; text: string; timestamp?: string }
-  | { type: 'tool_call'; task?: AgentTaskSpec; session?: RuntimeSession; toolName: string; toolCallId?: string; args?: unknown; timestamp?: string }
-  | { type: 'tool_result'; task?: AgentTaskSpec; session?: RuntimeSession; toolName: string; toolCallId?: string; result?: unknown; timestamp?: string }
-  | { type: 'artifact'; task?: AgentTaskSpec; session?: RuntimeSession; artifactId: string; name?: string; mimeType?: string; uri?: string; metadata?: Record<string, unknown>; timestamp?: string }
-  | { type: 'backend_error'; task: AgentTaskSpec; session?: RuntimeSession; backend: string; message: string; recoverable: boolean; timestamp: string }
-  | { type: 'backend_end'; task: AgentTaskSpec; session: RuntimeSession; backend: string; timestamp: string }
-  | { type: 'task_end'; task: AgentTaskSpec; status: AgentTaskStatus; reason: string; timestamp: string }
-  | { type: 'final'; task: AgentTaskSpec; session?: RuntimeSession; status: AgentTaskStatus; reason: string; text?: string; metadata?: Record<string, unknown>; timestamp: string }
-
-export interface RuntimeSession {
-  id: string
-  backend: string
-  status: 'active' | 'completed' | 'failed' | 'aborted'
-  resumeToken?: string
-  createdAt: string
-  updatedAt: string
-  metadata?: Record<string, unknown>
-}
-
-export interface RuntimeSessionStore {
-  get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined
-  put(session: RuntimeSession): Promise<void> | void
-  appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void
-  listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[]
-}
-
-export interface AgentBackendInput {
-  task: AgentTaskSpec
-  message?: string
-  messages?: Array<{ role: string; content: string }>
-  inputs?: Record<string, unknown>
-}
-
-export interface AgentBackendContext {
-  task: AgentTaskSpec
-  knowledge: KnowledgeReadinessReport
-  session: RuntimeSession
-  signal?: AbortSignal
-}
-
-export interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
-  kind: string
-  start?(input: TInput, context: Omit<AgentBackendContext, 'session'> & { requestedSessionId?: string }): Promise<RuntimeSession> | RuntimeSession
-  resume?(session: RuntimeSession, input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<RuntimeSession> | RuntimeSession
-  stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>
-  stop?(session: RuntimeSession, reason: string): Promise<void> | void
-}
-
-export interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
-  task: AgentTaskSpec
-  backend: AgentExecutionBackend<TInput>
-  input?: Omit<TInput, 'task'>
-  knowledge?: AgentKnowledgeProvider
-  sessionStore?: RuntimeSessionStore
-  sessionId?: string
-  resume?: boolean
-  signal?: AbortSignal
-  minimumReadinessScore?: number
-}
-
-export interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
-  task: AgentTaskSpec
-  adapter: AgentAdapter<TState, TAction, TActionResult, TEval>
-  knowledge?: AgentKnowledgeProvider
-  onEvent?: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>
-  store?: TraceStore
-  signal?: AbortSignal
-  scenarioId?: string
-  projectId?: string
-  variantId?: string
-  minimumReadinessScore?: number
-}
-
-export interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
-  task: AgentTaskSpec
-  status: AgentTaskStatus
-  knowledge: KnowledgeReadinessReport
-  questions: UserQuestion[]
-  acquisitionPlans: DataAcquisitionPlan[]
-  userAnswers: Record<string, string>
-  acquiredEvidenceIds: string[]
-  control: ControlRunResult<TState, TAction, TActionResult, TEval>
-  runRecords: RunRecord[]
-}
-
-export interface RuntimeTelemetryOptions {
-  /**
-   * Include raw task inputs. Off by default because task inputs often
-   * contain customer facts, credentials, source text, or internal IDs.
-   */
-  includeInputs?: boolean
-  /** Include requirement descriptions. Secret requirements are always redacted. */
-  includeRequirementDescriptions?: boolean
-  /** Include evidence IDs. Off by default; counts are safer for shared reports. */
-  includeEvidenceIds?: boolean
-  /** Include user answers from question preflight. Off by default. */
-  includeUserAnswers?: boolean
-  /** Include action payloads and action results for control steps. Off by default. */
-  includeControlPayloads?: boolean
-  /** Include task metadata. Off by default because metadata may carry IDs or policy internals. */
-  includeMetadata?: boolean
-  /** Include eval detail/evidence strings. Off by default because validators may echo private input. */
-  includeEvalDetails?: boolean
-}
-
-export interface SanitizedKnowledgeRequirement {
-  id: string
-  description?: string
-  requiredFor: string[]
-  category: KnowledgeRequirement['category']
-  acquisitionMode: KnowledgeRequirement['acquisitionMode']
-  importance: KnowledgeRequirement['importance']
-  freshness: KnowledgeRequirement['freshness']
-  sensitivity: KnowledgeRequirement['sensitivity']
-  confidenceNeeded: number
-  currentConfidence: number
-  evidenceCount: number
-  evidenceIds?: string[]
-  fallbackPolicy: KnowledgeRequirement['fallbackPolicy']
-}
-
-export interface SanitizedKnowledgeReadinessReport {
-  taskId: string
-  readinessScore: number
-  recommendedAction: KnowledgeReadinessReport['recommendedAction']
-  severity: KnowledgeReadinessReport['severity']
-  reason: string
-  blockingMissingRequirements: SanitizedKnowledgeRequirement[]
-  nonBlockingGaps: SanitizedKnowledgeRequirement[]
-  evidenceCount: number
-  evidenceIds?: string[]
-  missingRequirementIds: string[]
-}
-
-export interface AgentTaskRunSummary {
-  taskId: string
-  domain?: string
-  status: AgentTaskStatus
-  reason: string
-  readinessStatus: KnowledgeReadinessDecision['status']
-  readinessScore: number
-  recommendedAction: KnowledgeReadinessReport['recommendedAction']
-  blockingGapIds: string[]
-  nonBlockingGapIds: string[]
-  questionCount: number
-  acquisitionPlanCount: number
-  acquiredEvidenceCount: number
-  controlStepCount: number
-  pass: boolean
-  failureClass?: string
-  wallMs: number
-  costUsd: number
-}
-
-export interface KnowledgeReadinessDecision {
-  passed: boolean
-  status: 'ready' | 'blocked' | 'caveat'
-  reason: string
-  readinessScore: number
-  recommendedAction: KnowledgeReadinessReport['recommendedAction']
-  severity: KnowledgeReadinessReport['severity']
-  blockingGapIds: string[]
-  nonBlockingGapIds: string[]
-}
-
-export interface RuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> {
-  onEvent: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>
-  events: Array<Record<string, unknown>>
-}
-
-export type RuntimeStreamEventSink = (event: RuntimeStreamEvent) => void
-
-export interface RuntimeStreamEventSummary {
-  /** Total count of sanitized events collected. */
-  eventCount: number
-  /** Count of events per `type`. Useful for log-line summaries. */
-  eventCountsByType: Record<string, number>
-  /** First session id observed in a `session_created` / `session_resumed` event, if any. */
-  firstSessionId?: string
-  /** Last `final` event's status, if a final event was observed. */
-  finalStatus?: AgentTaskStatus
-  /** Last `final` event's reason, if a final event was observed. */
-  finalReason?: string
-  /** Concatenated `text_delta.text` across the stream, even when payloads are redacted. */
-  finalText: string
-}
-
-export interface RuntimeStreamEventCollector {
-  onEvent: RuntimeStreamEventSink
-  events: Array<Record<string, unknown>>
-  /** Snapshot of a small streaming-flavored summary derived from collected events. */
-  summary(): RuntimeStreamEventSummary
-}
-
-export interface ServerSentEventOptions {
-  event?: string
-  id?: string
-  retry?: number
-}
-
-export class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
-  private readonly sessions = new Map<string, RuntimeSession>()
-  private readonly events = new Map<string, RuntimeStreamEvent[]>()
-
-  get(sessionId: string): RuntimeSession | undefined {
-    return this.sessions.get(sessionId)
-  }
-
-  put(session: RuntimeSession): void {
-    this.sessions.set(session.id, session)
-  }
-
-  appendEvent(sessionId: string, event: RuntimeStreamEvent): void {
-    const existing = this.events.get(sessionId) ?? []
-    existing.push(event)
-    this.events.set(sessionId, existing)
-  }
-
-  listEvents(sessionId: string): RuntimeStreamEvent[] {
-    return [...(this.events.get(sessionId) ?? [])]
-  }
-}
-
-export async function runAgentTask<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(
-  options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>,
-): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>> {
-  const task = options.task
-  await emit(options.onEvent, { type: 'task_start', task })
-  await emit(options.onEvent, { type: 'readiness_start', task })
-  let knowledge = await buildReadiness(task, options.knowledge)
-  await emit(options.onEvent, { type: 'readiness_end', task, knowledge })
-  const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements)
-  const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
-    ...knowledge.blockingMissingRequirements,
-    ...knowledge.nonBlockingGaps,
-  ])
-  const preflight = await runKnowledgePreflight(task, questions, acquisitionPlans, options.knowledge, options.onEvent)
-  if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
-    await emit(options.onEvent, { type: 'readiness_start', task })
-    knowledge = await options.knowledge.refreshReadiness({
-      task,
-      previous: knowledge,
-      userAnswers: preflight.userAnswers,
-      acquiredEvidenceIds: preflight.acquiredEvidenceIds,
-    })
-    await emit(options.onEvent, { type: 'readiness_end', task, knowledge })
-  }
-
-  await emit(options.onEvent, { type: 'control_start', task, knowledge })
-  const scenarioId = options.scenarioId ?? task.id
-  const control = await runAgentControlLoop<TState, TAction, TActionResult, TEval>({
-    intent: task.intent,
-    budget: task.budget,
-    signal: options.signal,
-    store: options.store,
-    scenarioId,
-    projectId: options.projectId,
-    variantId: options.variantId,
-    observe: ({ history, abortSignal }) => options.adapter.observe({ task, knowledge, history, abortSignal }),
-    validate: async ({ state, history, abortSignal }) => {
-      const readinessEval = blockingKnowledgeEval(knowledge, { minimumScore: options.minimumReadinessScore })
-      const evals = await options.adapter.validate({ task, knowledge, state, history, abortSignal })
-      return [readinessEval as TEval, ...evals]
-    },
-    decide: (ctx) => {
-      if (isKnowledgeBlocked(ctx.evals)) {
-        return options.adapter.onKnowledgeBlocked?.({ task, knowledge, questions, acquisitionPlans }) ?? {
-          type: 'stop',
-          pass: false,
-          score: knowledge.readinessScore,
-          reason: `knowledge readiness blocked: ${knowledge.reason}`,
-        }
-      }
-      return options.adapter.decide(toAgentContext(task, knowledge, ctx))
-    },
-    act: (action, ctx) => options.adapter.act(action, toAgentContext(task, knowledge, ctx)),
-    shouldStop: options.adapter.shouldStop
-      ? (ctx) => options.adapter.shouldStop!(toAgentContext(task, knowledge, ctx))
-      : undefined,
-    getActionCostUsd: options.adapter.getActionCostUsd
-      ? ({ action, result, state, evals, history }) => options.adapter.getActionCostUsd!({ action, result, task, state, evals, history })
-      : undefined,
-    onStep: (step) => emit(options.onEvent, { type: 'control_step', task, step }),
-  })
-  await emit(options.onEvent, { type: 'control_end', task, control })
-  const status = statusFromControl(control)
-  await emit(options.onEvent, { type: 'task_end', task, status, reason: control.reason })
-
-  return {
-    task,
-    status,
-    knowledge,
-    questions,
-    acquisitionPlans,
-    userAnswers: preflight.userAnswers,
-    acquiredEvidenceIds: preflight.acquiredEvidenceIds,
-    control,
-    runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map((record) => (
-      record.scenarioId === undefined ? { ...record, scenarioId } : record
-    )),
-  }
-}
-
-export function summarizeAgentTaskRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>,
-): AgentTaskRunSummary {
-  return {
-    taskId: result.task.id,
-    domain: result.task.domain,
-    status: result.status,
-    reason: result.control.reason,
-    readinessStatus: decideKnowledgeReadiness(result.knowledge).status,
-    readinessScore: result.knowledge.readinessScore,
-    recommendedAction: result.knowledge.recommendedAction,
-    blockingGapIds: result.knowledge.blockingMissingRequirements.map((requirement) => requirement.id),
-    nonBlockingGapIds: result.knowledge.nonBlockingGaps.map((requirement) => requirement.id),
-    questionCount: result.questions.length,
-    acquisitionPlanCount: result.acquisitionPlans.length,
-    acquiredEvidenceCount: result.acquiredEvidenceIds.length,
-    controlStepCount: result.control.steps.length,
-    pass: result.control.pass,
-    failureClass: result.control.failureClass,
-    wallMs: result.control.wallMs,
-    costUsd: result.control.spentCostUsd,
-  }
-}
-
-export async function* runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(
-  options: RunAgentTaskStreamOptions<TInput>,
-): AsyncIterable<RuntimeStreamEvent> {
-  const task = options.task
-  const input = { task, ...(options.input ?? {}) } as TInput
-  const started = streamEvent({ type: 'task_start', task })
-  yield started
-
-  const readinessStart = streamEvent({ type: 'readiness_start', task })
-  yield readinessStart
-  let knowledge = await buildReadiness(task, options.knowledge)
-  const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements)
-  const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
-    ...knowledge.blockingMissingRequirements,
-    ...knowledge.nonBlockingGaps,
-  ])
-  const preflight = await runKnowledgePreflightStream(task, questions, acquisitionPlans, options.knowledge)
-  for (const event of preflight.events) yield event
-  if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
-    yield streamEvent({ type: 'readiness_start', task })
-    knowledge = await options.knowledge.refreshReadiness({
-      task,
-      previous: knowledge,
-      userAnswers: preflight.userAnswers,
-      acquiredEvidenceIds: preflight.acquiredEvidenceIds,
-    })
-  }
-  const decision = decideKnowledgeReadiness(knowledge, { minimumScore: options.minimumReadinessScore })
-  yield streamEvent({ type: 'readiness_end', task, knowledge, decision })
-  if (!decision.passed && decision.status === 'blocked') {
-    const reason = `knowledge readiness blocked: ${decision.reason}`
-    yield streamEvent({ type: 'task_end', task, status: 'blocked', reason })
-    yield streamEvent({ type: 'final', task, status: 'blocked', reason })
-    return
-  }
-
-  const store = options.sessionStore
-  const existing = options.sessionId ? await store?.get(options.sessionId) : undefined
-  const shouldResume = Boolean(options.resume && existing)
-  let session = shouldResume && existing
-    ? await resumeBackendSession(options.backend, existing, input, { task, knowledge, signal: options.signal })
-    : await startBackendSession(options.backend, input, { task, knowledge, signal: options.signal }, options.sessionId)
-  await store?.put(session)
-  const sessionEvent = streamEvent({
-    type: shouldResume ? 'session_resumed' : 'session_created',
-    task,
-    session,
-  })
-  await store?.appendEvent?.(session.id, sessionEvent)
-  yield sessionEvent
-
-  const backendStart = streamEvent({ type: 'backend_start', task, session, backend: options.backend.kind })
-  await store?.appendEvent?.(session.id, backendStart)
-  yield backendStart
-
-  let finalText = ''
-  try {
-    for await (const rawEvent of options.backend.stream(input, { task, knowledge, session, signal: options.signal })) {
-      const event = normalizeBackendStreamEvent(rawEvent, task, session)
-      if (event.type === 'text_delta') finalText += event.text
-      await store?.appendEvent?.(session.id, event)
-      yield event
-    }
-    const completedStatus: AgentTaskStatus = 'completed'
-    session = touchSession({ ...session, status: completedStatus })
-    await store?.put(session)
-    const backendEnd = streamEvent({ type: 'backend_end', task, session, backend: options.backend.kind })
-    await store?.appendEvent?.(session.id, backendEnd)
-    yield backendEnd
-    const reason = 'backend completed'
-    const taskEnd = streamEvent({ type: 'task_end', task, status: completedStatus, reason })
-    await store?.appendEvent?.(session.id, taskEnd)
-    yield taskEnd
-    const final = streamEvent({ type: 'final', task, session, status: completedStatus, reason, text: finalText || undefined })
-    await store?.appendEvent?.(session.id, final)
-    yield final
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err)
-    session = touchSession({ ...session, status: options.signal?.aborted ? 'aborted' : 'failed' })
-    await store?.put(session)
-    let stopErrorMessage: string | undefined
-    try {
-      await options.backend.stop?.(session, message)
-    } catch (stopErr) {
-      stopErrorMessage = stopErr instanceof Error ? stopErr.message : String(stopErr)
-    }
-    const backendError = streamEvent({
-      type: 'backend_error',
-      task,
-      session,
-      backend: options.backend.kind,
-      message: stopErrorMessage ? `${message}; backend stop failed: ${stopErrorMessage}` : message,
-      recoverable: !options.signal?.aborted,
-    })
-    await store?.appendEvent?.(session.id, backendError)
-    yield backendError
-    const status: AgentTaskStatus = options.signal?.aborted ? 'aborted' : 'failed'
-    const taskEnd = streamEvent({ type: 'task_end', task, status, reason: message })
-    await store?.appendEvent?.(session.id, taskEnd)
-    yield taskEnd
-    const final = streamEvent({ type: 'final', task, session, status, reason: message, text: finalText || undefined })
-    await store?.appendEvent?.(session.id, final)
-    yield final
-  }
-}
-
-export function decideKnowledgeReadiness(
-  report: KnowledgeReadinessReport,
-  options: { minimumScore?: number } = {},
-): KnowledgeReadinessDecision {
-  const minimumScore = options.minimumScore ?? 0.7
-  const blockingGapIds = report.blockingMissingRequirements.map((requirement) => requirement.id)
-  const nonBlockingGapIds = report.nonBlockingGaps.map((requirement) => requirement.id)
-  if (blockingGapIds.length > 0) {
-    return {
-      passed: false,
-      status: 'blocked',
-      reason: report.reason,
-      readinessScore: report.readinessScore,
-      recommendedAction: report.recommendedAction,
-      severity: report.severity,
-      blockingGapIds,
-      nonBlockingGapIds,
-    }
-  }
-  if (report.readinessScore < minimumScore) {
-    return {
-      passed: false,
-      status: 'caveat',
-      reason: `Knowledge readiness score ${report.readinessScore.toFixed(3)} is below minimum ${minimumScore.toFixed(3)}.`,
-      readinessScore: report.readinessScore,
-      recommendedAction: report.recommendedAction,
-      severity: report.severity,
-      blockingGapIds,
-      nonBlockingGapIds,
-    }
-  }
-  return {
-    passed: true,
-    status: 'ready',
-    reason: report.reason,
-    readinessScore: report.readinessScore,
-    recommendedAction: report.recommendedAction,
-    severity: report.severity,
-    blockingGapIds,
-    nonBlockingGapIds,
-  }
-}
-
-export function sanitizeKnowledgeReadinessReport(
-  report: KnowledgeReadinessReport,
-  options: RuntimeTelemetryOptions = {},
-): SanitizedKnowledgeReadinessReport {
-  return {
-    taskId: report.taskId,
-    readinessScore: report.readinessScore,
-    recommendedAction: report.recommendedAction,
-    severity: report.severity,
-    reason: report.reason,
-    blockingMissingRequirements: report.blockingMissingRequirements.map((requirement) =>
-      sanitizeKnowledgeRequirement(requirement, options),
-    ),
-    nonBlockingGaps: report.nonBlockingGaps.map((requirement) =>
-      sanitizeKnowledgeRequirement(requirement, options),
-    ),
-    evidenceCount: report.bundle.evidenceIds.length,
-    evidenceIds: options.includeEvidenceIds ? report.bundle.evidenceIds : undefined,
-    missingRequirementIds: report.bundle.missing.map((requirement) => requirement.id),
-  }
-}
-
-export function sanitizeAgentRuntimeEvent<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>,
-  options: RuntimeTelemetryOptions = {},
-): Record<string, unknown> {
-  const base = { type: event.type, task: sanitizeTask(event.task, options) }
-  if (event.type === 'readiness_start' || event.type === 'task_start' || event.type === 'control_start') {
-    return event.type === 'control_start'
-      ? { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) }
-      : base
-  }
-  if (event.type === 'readiness_end') {
-    return { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) }
-  }
-  if (event.type === 'questions_start') {
-    return { ...base, questions: event.questions.map((question) => sanitizeQuestion(question, options)) }
-  }
-  if (event.type === 'questions_end') {
-    return {
-      ...base,
-      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
-      userAnswers: options.includeUserAnswers ? event.userAnswers : redactRecord(event.userAnswers),
-    }
-  }
-  if (event.type === 'acquisition_start') {
-    return { ...base, acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan) }
-  }
-  if (event.type === 'acquisition_end') {
-    return {
-      ...base,
-      acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan),
-      acquiredEvidenceCount: event.acquiredEvidenceIds.length,
-      acquiredEvidenceIds: options.includeEvidenceIds ? event.acquiredEvidenceIds : undefined,
-    }
-  }
-  if (event.type === 'control_step') {
-    return { ...base, step: sanitizeControlStep(event.step, options) }
-  }
-  if (event.type === 'control_end') {
-    return { ...base, control: sanitizeControlRun(event.control, options) }
-  }
-  return { ...base, status: event.status, reason: event.reason }
-}
-
-export function sanitizeRuntimeStreamEvent(
-  event: RuntimeStreamEvent,
-  options: RuntimeTelemetryOptions = {},
-): Record<string, unknown> {
-  const withTask = 'task' in event && event.task
-    ? { task: sanitizeTask(event.task, options) }
-    : {}
-  const withSession = 'session' in event && event.session
-    ? { session: sanitizeRuntimeSession(event.session, options) }
-    : {}
-
-  if (event.type === 'readiness_end') {
-    return {
-      type: event.type,
-      ...withTask,
-      timestamp: event.timestamp,
-      decision: event.decision,
-      knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options),
-    }
-  }
-  if (event.type === 'questions_start') {
-    return { type: event.type, ...withTask, timestamp: event.timestamp, questions: event.questions.map((question) => sanitizeQuestion(question, options)) }
-  }
-  if (event.type === 'questions_end') {
-    return {
-      type: event.type,
-      ...withTask,
-      timestamp: event.timestamp,
-      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
-      userAnswers: options.includeUserAnswers ? event.userAnswers : redactRecord(event.userAnswers),
-    }
-  }
-  if (event.type === 'acquisition_start') {
-    return { type: event.type, ...withTask, timestamp: event.timestamp, acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan) }
-  }
-  if (event.type === 'acquisition_end') {
-    return {
-      type: event.type,
-      ...withTask,
-      timestamp: event.timestamp,
-      acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan),
-      acquiredEvidenceCount: event.acquiredEvidenceIds.length,
-      acquiredEvidenceIds: options.includeEvidenceIds ? event.acquiredEvidenceIds : undefined,
-    }
-  }
-  if (event.type === 'tool_call') {
-    return {
-      type: event.type,
-      ...withTask,
-      ...withSession,
-      timestamp: event.timestamp,
-      toolName: event.toolName,
-      toolCallId: event.toolCallId,
-      args: options.includeControlPayloads ? event.args : undefined,
-    }
-  }
-  if (event.type === 'tool_result') {
-    return {
-      type: event.type,
-      ...withTask,
-      ...withSession,
-      timestamp: event.timestamp,
-      toolName: event.toolName,
-      toolCallId: event.toolCallId,
-      result: options.includeControlPayloads ? event.result : undefined,
-    }
-  }
-  if (event.type === 'artifact') {
-    return {
-      type: event.type,
-      ...withTask,
-      ...withSession,
-      timestamp: event.timestamp,
-      artifactId: event.artifactId,
-      name: event.name,
-      mimeType: event.mimeType,
-      uri: options.includeEvidenceIds ? event.uri : undefined,
-      metadata: options.includeMetadata ? event.metadata : undefined,
-    }
-  }
-  if (event.type === 'final') {
-    return {
-      type: event.type,
-      ...withTask,
-      ...withSession,
-      timestamp: event.timestamp,
-      status: event.status,
-      reason: event.reason,
-      text: options.includeControlPayloads ? event.text : undefined,
-      metadata: options.includeMetadata ? event.metadata : undefined,
-    }
-  }
-  return {
-    type: event.type,
-    ...withTask,
-    ...withSession,
-    timestamp: 'timestamp' in event ? event.timestamp : undefined,
-    ...pickPublicStreamFields(event),
-  }
-}
-
-export function createRuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult>(
-  options: RuntimeTelemetryOptions = {},
-): RuntimeEventCollector<TState, TAction, TActionResult, TEval> {
-  const events: Array<Record<string, unknown>> = []
-  return {
-    events,
-    onEvent: (event) => {
-      events.push(sanitizeAgentRuntimeEvent(event, options))
-    },
-  }
-}
-
 /**
- * Streaming-event counterpart of `createRuntimeEventCollector`. Use this with
- * `runAgentTaskStream` — pass each yielded event through `onEvent` and read
- * the sanitized copies off `events`. The same `RuntimeTelemetryOptions`
- * redaction flags apply.
+ * @tangle-network/agent-runtime
+ *
+ * Reusable runtime lifecycle for domain-specific agents. Standardizes the
+ * task lifecycle (knowledge readiness → questions / acquisition → control
+ * loop → eval) and delegates domain behavior to an adapter. Owns no domain
+ * policy, models, tools, connectors, or UI.
  *
- * Stream and non-stream events have different field shapes (timestamps,
- * sessions, text/tool deltas) so this is a sibling factory rather than an
- * overload of `createRuntimeEventCollector`; the unified-union alternative
- * was rejected because dispatching on `type` alone would silently misroute
- * events whose `type` literals overlap (`task_start`, `readiness_end`, etc.).
+ * See `docs/concepts.md` (mental model) and `README.md` (quickstart). Every
+ * public export below carries a `@stable` or `@experimental` tag; treat
+ * `@experimental` exports as subject to change inside this minor.
  */
-export function createRuntimeStreamEventCollector(
-  options: RuntimeTelemetryOptions = {},
-): RuntimeStreamEventCollector {
-  const events: Array<Record<string, unknown>> = []
-  const eventCountsByType: Record<string, number> = {}
-  let firstSessionId: string | undefined
-  let finalStatus: AgentTaskStatus | undefined
-  let finalReason: string | undefined
-  let finalText = ''
-  return {
-    events,
-    onEvent: (event) => {
-      events.push(sanitizeRuntimeStreamEvent(event, options))
-      eventCountsByType[event.type] = (eventCountsByType[event.type] ?? 0) + 1
-      if (event.type === 'text_delta') finalText += event.text
-      if (!firstSessionId && (event.type === 'session_created' || event.type === 'session_resumed')) {
-        firstSessionId = event.session.id
-      }
-      if (event.type === 'final') {
-        finalStatus = event.status
-        finalReason = event.reason
-      }
-    },
-    summary() {
-      return {
-        eventCount: events.length,
-        eventCountsByType: { ...eventCountsByType },
-        firstSessionId,
-        finalStatus,
-        finalReason,
-        finalText,
-      }
-    },
-  }
-}
-
-export function encodeServerSentEvent(
-  data: unknown,
-  options: ServerSentEventOptions = {},
-): string {
-  const lines: string[] = []
-  if (options.id) lines.push(`id: ${stripNewlines(options.id)}`)
-  if (options.event) lines.push(`event: ${stripNewlines(options.event)}`)
-  if (typeof options.retry === 'number' && Number.isFinite(options.retry) && options.retry >= 0) {
-    lines.push(`retry: ${Math.floor(options.retry)}`)
-  }
-
-  const payload = typeof data === 'string' ? data : JSON.stringify(data)
-  for (const line of payload.split(/\r?\n/)) {
-    lines.push(`data: ${line}`)
-  }
-  return `${lines.join('\n')}\n\n`
-}
-
-export function readinessServerSentEvent(
-  report: KnowledgeReadinessReport,
-  options: RuntimeTelemetryOptions & ServerSentEventOptions = {},
-): string {
-  const { event, id, retry, ...telemetryOptions } = options
-  return encodeServerSentEvent({
-    type: 'readiness',
-    readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions),
-  }, { event, id, retry })
-}
-
-export function runtimeStreamServerSentEvent(
-  event: RuntimeStreamEvent,
-  options: RuntimeTelemetryOptions & ServerSentEventOptions = {},
-): string {
-  const { event: sseEvent, id, retry, ...telemetryOptions } = options
-  return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), { event: sseEvent, id, retry })
-}
-
-export function createIterableBackend<TInput extends AgentBackendInput>(
-  options: {
-    kind: string
-    start?: AgentExecutionBackend<TInput>['start']
-    resume?: AgentExecutionBackend<TInput>['resume']
-    stream: AgentExecutionBackend<TInput>['stream']
-    stop?: AgentExecutionBackend<TInput>['stop']
-  },
-): AgentExecutionBackend<TInput> {
-  return options
-}
-
-export function createSandboxPromptBackend<TBox, TInput extends AgentBackendInput = AgentBackendInput>(
-  options: {
-    kind?: string
-    getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox
-    streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>
-    mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined
-    getSessionId?: (box: TBox, input: TInput) => string | undefined
-  },
-): AgentExecutionBackend<TInput> {
-  return {
-    kind: options.kind ?? 'sandbox',
-    async start(input, context) {
-      const box = await options.getBox(input, context)
-      return newRuntimeSession(options.kind ?? 'sandbox', options.getSessionId?.(box, input) ?? context.requestedSessionId, {
-        resumable: true,
-      })
-    },
-    resume(session) {
-      return touchSession({ ...session, status: 'active' })
-    },
-    async *stream(input, context) {
-      const box = await options.getBox(input, context)
-      const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent
-      for await (const event of options.streamPrompt(box, message, context)) {
-        const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context)
-        if (mapped) yield mapped
-      }
-    },
-  }
-}
-
-export function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(
-  options: {
-    apiKey: string
-    baseUrl: string
-    model: string
-    kind?: string
-    fetchImpl?: typeof fetch
-  },
-): AgentExecutionBackend<TInput> {
-  const fetcher = options.fetchImpl ?? fetch
-  return {
-    kind: options.kind ?? 'tcloud',
-    start(_input, context) {
-      return newRuntimeSession(options.kind ?? 'tcloud', context.requestedSessionId)
-    },
-    async *stream(input, context) {
-      const response = await fetcher(`${options.baseUrl.replace(/\/$/, '')}/chat/completions`, {
-        method: 'POST',
-        headers: {
-          Authorization: `Bearer ${options.apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        body: JSON.stringify({
-          model: options.model,
-          stream: true,
-          messages: input.messages ?? [{ role: 'user', content: input.message ?? context.task.intent }],
-        }),
-        signal: context.signal,
-      })
-      if (!response.ok) throw new Error(`chat backend returned ${response.status}`)
-      yield* streamResponseEvents(response, context)
-    },
-  }
-}
-
-async function runKnowledgePreflight<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  task: AgentTaskSpec,
-  questions: UserQuestion[],
-  acquisitionPlans: DataAcquisitionPlan[],
-  provider: AgentKnowledgeProvider | undefined,
-  onEvent: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval> | undefined,
-): Promise<{ userAnswers: Record<string, string>; acquiredEvidenceIds: string[] }> {
-  let userAnswers: Record<string, string> = {}
-  let acquiredEvidenceIds: string[] = []
-  if (questions.length > 0 && provider?.answerQuestions) {
-    await emit(onEvent, { type: 'questions_start', task, questions })
-    userAnswers = await provider.answerQuestions(questions, task)
-    await emit(onEvent, { type: 'questions_end', task, questions, userAnswers })
-  }
-  if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
-    await emit(onEvent, { type: 'acquisition_start', task, acquisitionPlans })
-    acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task)
-    await emit(onEvent, { type: 'acquisition_end', task, acquisitionPlans, acquiredEvidenceIds })
-  }
-  return { userAnswers, acquiredEvidenceIds }
-}
-
-async function runKnowledgePreflightStream(
-  task: AgentTaskSpec,
-  questions: UserQuestion[],
-  acquisitionPlans: DataAcquisitionPlan[],
-  provider: AgentKnowledgeProvider | undefined,
-): Promise<{
-  userAnswers: Record<string, string>
-  acquiredEvidenceIds: string[]
-  events: RuntimeStreamEvent[]
-}> {
-  const events: RuntimeStreamEvent[] = []
-  let userAnswers: Record<string, string> = {}
-  let acquiredEvidenceIds: string[] = []
-  if (questions.length > 0 && provider?.answerQuestions) {
-    events.push(streamEvent({ type: 'questions_start', task, questions }))
-    userAnswers = await provider.answerQuestions(questions, task)
-    events.push(streamEvent({ type: 'questions_end', task, questions, userAnswers }))
-  }
-  if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
-    events.push(streamEvent({ type: 'acquisition_start', task, acquisitionPlans }))
-    acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task)
-    events.push(streamEvent({ type: 'acquisition_end', task, acquisitionPlans, acquiredEvidenceIds }))
-  }
-  return { userAnswers, acquiredEvidenceIds, events }
-}
-
-function sanitizeTask(task: AgentTaskSpec, options: RuntimeTelemetryOptions): Record<string, unknown> {
-  return {
-    id: task.id,
-    intent: task.intent,
-    domain: task.domain,
-    inputs: options.includeInputs ? task.inputs : task.inputs ? '[redacted]' : undefined,
-    requiredKnowledge: task.requiredKnowledge?.map((requirement) =>
-      sanitizeKnowledgeRequirement(requirement, options),
-    ),
-    metadata: options.includeMetadata ? task.metadata : task.metadata ? '[redacted]' : undefined,
-  }
-}
-
-function sanitizeRuntimeSession(session: RuntimeSession, options: RuntimeTelemetryOptions): Record<string, unknown> {
-  return {
-    id: session.id,
-    backend: session.backend,
-    status: session.status,
-    hasResumeToken: Boolean(session.resumeToken),
-    createdAt: session.createdAt,
-    updatedAt: session.updatedAt,
-    metadata: options.includeMetadata ? session.metadata : session.metadata ? '[redacted]' : undefined,
-  }
-}
-
-function sanitizeKnowledgeRequirement(
-  requirement: KnowledgeRequirement,
-  options: RuntimeTelemetryOptions,
-): SanitizedKnowledgeRequirement {
-  const includeDescription = options.includeRequirementDescriptions && requirement.sensitivity !== 'secret'
-  return {
-    id: requirement.id,
-    description: includeDescription ? requirement.description : undefined,
-    requiredFor: requirement.requiredFor,
-    category: requirement.category,
-    acquisitionMode: requirement.acquisitionMode,
-    importance: requirement.importance,
-    freshness: requirement.freshness,
-    sensitivity: requirement.sensitivity,
-    confidenceNeeded: requirement.confidenceNeeded,
-    currentConfidence: requirement.currentConfidence,
-    evidenceCount: requirement.evidenceIds.length,
-    evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : undefined,
-    fallbackPolicy: requirement.fallbackPolicy,
-  }
-}
-
-function sanitizeQuestion(question: UserQuestion, options: RuntimeTelemetryOptions): Record<string, unknown> {
-  return {
-    id: question.id,
-    question: options.includeRequirementDescriptions && question.answerType !== 'credential'
-      ? question.question
-      : undefined,
-    reason: options.includeRequirementDescriptions ? question.reason : undefined,
-    requirementId: question.requirementId,
-    importance: question.importance,
-    answerType: question.answerType,
-    impactIfUnknown: options.includeRequirementDescriptions ? question.impactIfUnknown : undefined,
-    optionCount: question.options?.length ?? 0,
-  }
-}
-
-function sanitizeAcquisitionPlan(plan: DataAcquisitionPlan): Record<string, unknown> {
-  return {
-    id: plan.id,
-    requirementIds: plan.requirementIds,
-    mode: plan.mode,
-    priority: plan.priority,
-    expectedEvidenceCount: plan.expectedEvidenceIds?.length ?? 0,
-    questionCount: plan.questions?.length ?? 0,
-  }
-}
-
-function sanitizeControlStep<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  step: ControlStep<TState, TAction, TActionResult, TEval>,
-  options: RuntimeTelemetryOptions,
-): Record<string, unknown> {
-  const actionOutcome = step.actionOutcome
-  return {
-    index: step.index,
-    decisionType: step.decision.type,
-    reason: step.decision.reason,
-    action: options.includeControlPayloads && step.decision.type === 'continue' ? step.decision.action : undefined,
-    result: options.includeControlPayloads && actionOutcome?.ok ? actionOutcome.result : undefined,
-    actionOk: actionOutcome?.ok,
-    actionError: actionOutcome?.ok === false ? actionOutcome.error : undefined,
-    durationMs: actionOutcome?.durationMs,
-    evalsBefore: summarizeEvals(step.evalsBefore, options),
-    evalsAfter: summarizeEvals(step.evalsAfter, options),
-    startedAt: step.startedAt,
-    endedAt: step.endedAt,
-  }
-}
-
-function sanitizeControlRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  control: ControlRunResult<TState, TAction, TActionResult, TEval>,
-  options: RuntimeTelemetryOptions,
-): Record<string, unknown> {
-  return {
-    pass: control.pass,
-    completed: control.completed,
-    reason: control.reason,
-    score: control.score,
-    stepCount: control.steps.length,
-    wallMs: control.wallMs,
-    spentCostUsd: control.spentCostUsd,
-    failureClass: control.failureClass,
-    stoppedBy: control.stoppedBy,
-    runId: control.runId,
-    runtimeErrorCount: control.runtimeErrors.length,
-    finalEvals: summarizeEvals(control.finalEvals, options),
-  }
-}
-
-function summarizeEvals(evals: ControlEvalResult[], options: RuntimeTelemetryOptions): Array<Record<string, unknown>> {
-  return evals.map((evalResult) => ({
-    id: evalResult.id,
-    passed: evalResult.passed,
-    score: evalResult.score,
-    severity: evalResult.severity,
-    objective: evalResult.objective,
-    detail: options.includeEvalDetails ? evalResult.detail : undefined,
-    evidence: options.includeEvalDetails ? evalResult.evidence : undefined,
-  }))
-}
-
-function redactRecord(record: Record<string, string>): Record<string, string> {
-  return Object.fromEntries(Object.keys(record).map((key) => [key, '[redacted]']))
-}
-
-function stripNewlines(value: string): string {
-  return value.replace(/[\r\n]/g, ' ')
-}
-
-function timestamp(): string {
-  return new Date().toISOString()
-}
-
-function streamEvent<T extends Omit<RuntimeStreamEvent, 'timestamp'>>(event: T): T & { timestamp: string } {
-  return { ...event, timestamp: timestamp() }
-}
-
-function newRuntimeSession(backend: string, requestedId?: string, metadata?: Record<string, unknown>): RuntimeSession {
-  const now = timestamp()
-  return {
-    id: requestedId || crypto.randomUUID(),
-    backend,
-    status: 'active',
-    createdAt: now,
-    updatedAt: now,
-    metadata,
-  }
-}
-
-function touchSession(session: RuntimeSession): RuntimeSession {
-  return { ...session, updatedAt: timestamp() }
-}
-
-async function startBackendSession<TInput extends AgentBackendInput>(
-  backend: AgentExecutionBackend<TInput>,
-  input: TInput,
-  context: Omit<AgentBackendContext, 'session'>,
-  requestedSessionId?: string,
-): Promise<RuntimeSession> {
-  if (backend.start) return backend.start(input, { ...context, requestedSessionId })
-  return newRuntimeSession(backend.kind, requestedSessionId)
-}
-
-async function resumeBackendSession<TInput extends AgentBackendInput>(
-  backend: AgentExecutionBackend<TInput>,
-  session: RuntimeSession,
-  input: TInput,
-  context: Omit<AgentBackendContext, 'session'>,
-): Promise<RuntimeSession> {
-  if (session.backend !== backend.kind) {
-    throw new Error(`Cannot resume ${session.backend} session with ${backend.kind} backend`)
-  }
-  if (backend.resume) return backend.resume(session, input, context)
-  return touchSession({ ...session, status: 'active' })
-}
-
-function normalizeBackendStreamEvent(event: RuntimeStreamEvent, task: AgentTaskSpec, session: RuntimeSession): RuntimeStreamEvent {
-  if ('task' in event && event.task && 'session' in event && event.session && 'timestamp' in event && event.timestamp) return event
-  return {
-    ...event,
-    task: 'task' in event && event.task ? event.task : task,
-    session: 'session' in event && event.session ? event.session : session,
-    timestamp: 'timestamp' in event && event.timestamp ? event.timestamp : timestamp(),
-  } as RuntimeStreamEvent
-}
-
-function pickPublicStreamFields(event: RuntimeStreamEvent): Record<string, unknown> {
-  if (event.type === 'session_created' || event.type === 'session_resumed') return {}
-  if (event.type === 'backend_start' || event.type === 'backend_end') return { backend: event.backend }
-  if (event.type === 'backend_error') return { backend: event.backend, message: event.message, recoverable: event.recoverable }
-  if (event.type === 'task_end') return { status: event.status, reason: event.reason }
-  if (event.type === 'text_delta' || event.type === 'reasoning_delta') return { text: event.text }
-  return {}
-}
-
-function mapCommonBackendEvent(event: unknown, context: AgentBackendContext): RuntimeStreamEvent | undefined {
-  if (!event || typeof event !== 'object') return undefined
-  const record = event as Record<string, unknown>
-  const type = String(record.type ?? '')
-  const data = record.data && typeof record.data === 'object' ? record.data as Record<string, unknown> : record
-  if (type === 'message.part.updated' || type === 'text_delta' || type === 'delta') {
-    const text = stringValue(data.text) ?? stringValue(data.delta) ?? stringValue(record.text)
-    return text ? { type: 'text_delta', task: context.task, session: context.session, text, timestamp: timestamp() } : undefined
-  }
-  if (type === 'reasoning_delta') {
-    const text = stringValue(data.text) ?? stringValue(record.text)
-    return text ? { type: 'reasoning_delta', task: context.task, session: context.session, text, timestamp: timestamp() } : undefined
-  }
-  if (type === 'tool_call') {
-    return {
-      type: 'tool_call',
-      task: context.task,
-      session: context.session,
-      toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? 'tool',
-      toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
-      args: data.args ?? data.input ?? record.args,
-      timestamp: timestamp(),
-    }
-  }
-  if (type === 'tool_result') {
-    return {
-      type: 'tool_result',
-      task: context.task,
-      session: context.session,
-      toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? 'tool',
-      toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
-      result: data.result ?? data.output ?? record.result,
-      timestamp: timestamp(),
-    }
-  }
-  if (type === 'result' || type === 'final') {
-    const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text)
-    return text ? { type: 'text_delta', task: context.task, session: context.session, text, timestamp: timestamp() } : undefined
-  }
-  return undefined
-}
-
-async function* streamResponseEvents(response: Response, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent> {
-  const body = response.body
-  if (!body) return
-  const reader = body.getReader()
-  const decoder = new TextDecoder()
-  let buffer = ''
-  for (;;) {
-    const { done, value } = await reader.read()
-    if (done) break
-    buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, '\n')
-    for (const event of drainStreamBuffer(false)) yield event
-  }
-  buffer += decoder.decode().replace(/\r\n/g, '\n')
-  for (const event of drainStreamBuffer(true)) yield event
-  if (buffer.trim()) {
-    const event = parseStreamChunk(buffer, context)
-    if (event) yield event
-  }
-
-  function* drainStreamBuffer(flush: boolean): Iterable<RuntimeStreamEvent> {
-    for (;;) {
-      const sseBoundary = buffer.indexOf('\n\n')
-      if (sseBoundary >= 0) {
-        const chunk = buffer.slice(0, sseBoundary)
-        buffer = buffer.slice(sseBoundary + 2)
-        const event = parseStreamChunk(chunk, context)
-        if (event) yield event
-        continue
-      }
-
-      const newline = buffer.indexOf('\n')
-      if (newline >= 0 && !buffer.slice(0, newline).startsWith('data:')) {
-        const line = buffer.slice(0, newline)
-        buffer = buffer.slice(newline + 1)
-        const event = parseStreamChunk(line, context)
-        if (event) yield event
-        continue
-      }
-
-      if (flush && buffer.trim() && !buffer.trimStart().startsWith('data:')) {
-        const line = buffer
-        buffer = ''
-        const event = parseStreamChunk(line, context)
-        if (event) yield event
-        continue
-      }
-
-      break
-    }
-  }
-}
-
-function parseStreamChunk(chunk: string, context: AgentBackendContext): RuntimeStreamEvent | undefined {
-  const lines = chunk.split(/\r?\n/)
-  const dataLines = lines.filter((line) => line.startsWith('data:'))
-  const data = dataLines.length > 0
-    ? dataLines.map((line) => line.slice(5).trimStart()).join('\n')
-    : chunk.trim()
-  if (!data || data === '[DONE]') return undefined
-  try {
-    const parsed = JSON.parse(data) as Record<string, unknown>
-    const choice = Array.isArray(parsed.choices) ? parsed.choices[0] as Record<string, unknown> | undefined : undefined
-    const delta = choice?.delta as Record<string, unknown> | undefined
-    const message = choice?.message as Record<string, unknown> | undefined
-    const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text)
-    if (text) return { type: 'text_delta', task: context.task, session: context.session, text, timestamp: timestamp() }
-    return mapCommonBackendEvent(parsed, context)
-  } catch {
-    return { type: 'text_delta', task: context.task, session: context.session, text: data, timestamp: timestamp() }
-  }
-}
-
-function stringValue(value: unknown): string | undefined {
-  return typeof value === 'string' && value.length > 0 ? value : undefined
-}
-
-function buildReadiness(
-  task: AgentTaskSpec,
-  provider: AgentKnowledgeProvider | undefined,
-): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport {
-  if (provider?.buildReadiness) return provider.buildReadiness(task)
-  return scoreKnowledgeReadiness({
-    taskId: task.id,
-    requirements: task.requiredKnowledge ?? [],
-    metadata: { domain: task.domain, ...task.metadata },
-  })
-}
-
-function isKnowledgeBlocked(evals: ControlEvalResult[]): boolean {
-  return evals.some((evalResult) => evalResult.id === 'knowledge-ready' && !evalResult.passed)
-}
-
-function statusFromControl(control: ControlRunResult<unknown, unknown, unknown, ControlEvalResult>): AgentTaskStatus {
-  if (control.stoppedBy === 'abort') return 'aborted'
-  if (control.reason.includes('knowledge readiness blocked')) return 'blocked'
-  if (control.pass) return 'completed'
-  return 'failed'
-}
-
-async function emit<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  sink: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval> | undefined,
-  event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>,
-): Promise<void> {
-  await sink?.(event)
-}
-
-function toAgentContext<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
-  task: AgentTaskSpec,
-  knowledge: KnowledgeReadinessReport,
-  ctx: ControlContext<TState, TAction, TActionResult, TEval>,
-): AgentTaskContext<TState, TAction, TActionResult, TEval> {
-  return {
-    task,
-    knowledge,
-    state: ctx.state,
-    evals: ctx.evals,
-    history: ctx.history,
-    budget: ctx.budget,
-    stepIndex: ctx.stepIndex,
-    wallMs: ctx.wallMs,
-    spentCostUsd: ctx.spentCostUsd,
-    remainingCostUsd: ctx.remainingCostUsd,
-    abortSignal: ctx.abortSignal,
-  }
-}
 
+// ── Re-exports from @tangle-network/agent-eval (compat surface) ──────
 export type {
   ControlBudget,
   ControlDecision,
@@ -1386,3 +24,89 @@ export type {
   RunRecord,
   UserQuestion,
 } from '@tangle-network/agent-eval'
+// ── Backends ──────────────────────────────────────────────────────────
+export {
+  createIterableBackend,
+  createOpenAICompatibleBackend,
+  createSandboxPromptBackend,
+} from './backends'
+// ── Errors ───────────────────────────────────────────────────────────
+export {
+  AgentEvalError,
+  type AgentEvalErrorCode,
+  BackendTransportError,
+  CaptureIntegrityError,
+  ConfigError,
+  JudgeError,
+  NotFoundError,
+  ReplayError,
+  RuntimeRunStateError,
+  SessionMismatchError,
+  ValidationError,
+  VerificationError,
+} from './errors'
+// ── Readiness ─────────────────────────────────────────────────────────
+export { decideKnowledgeReadiness } from './readiness'
+// ── Run loop ─────────────────────────────────────────────────────────
+export { runAgentTask, runAgentTaskStream, summarizeAgentTaskRun } from './run'
+export type {
+  RuntimeRunCompleteInput,
+  RuntimeRunCost,
+  RuntimeRunHandle,
+  RuntimeRunOptions,
+  RuntimeRunPersistenceAdapter,
+  RuntimeRunRow,
+  RuntimeRunStatus,
+} from './runtime-run'
+// ── Production run lifecycle (new in 0.7.0) ──────────────────────────
+export { startRuntimeRun } from './runtime-run'
+export type {
+  RuntimeEventCollector,
+  RuntimeStreamEventCollector,
+  RuntimeStreamEventSink,
+  RuntimeStreamEventSummary,
+  RuntimeTelemetryOptions,
+  SanitizedKnowledgeReadinessReport,
+  SanitizedKnowledgeRequirement,
+} from './sanitize'
+// ── Sanitization / telemetry ─────────────────────────────────────────
+export {
+  createRuntimeEventCollector,
+  createRuntimeStreamEventCollector,
+  sanitizeAgentRuntimeEvent,
+  sanitizeKnowledgeReadinessReport,
+  sanitizeRuntimeStreamEvent,
+} from './sanitize'
+// ── Sessions ──────────────────────────────────────────────────────────
+export { InMemoryRuntimeSessionStore } from './sessions'
+export type { ServerSentEventOptions } from './sse'
+// ── SSE ───────────────────────────────────────────────────────────────
+export {
+  encodeServerSentEvent,
+  readinessServerSentEvent,
+  runtimeStreamServerSentEvent,
+} from './sse'
+export type { TraceBridge, TraceBridgeOptions } from './trace-bridge'
+// ── agent-eval trace bridge (new in 0.7.0) ───────────────────────────
+export { createTraceBridge, toAgentEvalTrace } from './trace-bridge'
+// ── Core types ───────────────────────────────────────────────────────
+export type {
+  AgentAdapter,
+  AgentBackendContext,
+  AgentBackendInput,
+  AgentExecutionBackend,
+  AgentKnowledgeProvider,
+  AgentRuntimeEvent,
+  AgentRuntimeEventSink,
+  AgentTaskContext,
+  AgentTaskRunResult,
+  AgentTaskRunSummary,
+  AgentTaskSpec,
+  AgentTaskStatus,
+  KnowledgeReadinessDecision,
+  RunAgentTaskOptions,
+  RunAgentTaskStreamOptions,
+  RuntimeSession,
+  RuntimeSessionStore,
+  RuntimeStreamEvent,
+} from './types'
diff --git a/src/readiness.ts b/src/readiness.ts
new file mode 100644
index 0000000..1e1a4fe
--- /dev/null
+++ b/src/readiness.ts
@@ -0,0 +1,68 @@
+/**
+ * @stable
+ *
+ * Pure readiness-decision helper. Maps a `KnowledgeReadinessReport` from
+ * `@tangle-network/agent-eval` to a three-state branch (`ready` / `blocked` /
+ * `caveat`) the runtime, route handlers, and UI shells can all switch on.
+ *
+ * Default `minimumScore` of 0.7 mirrors the readiness scoring scale in
+ * agent-eval; callers tightening or loosening this should keep it consistent
+ * across all entry points for the same product so the UI / metrics agree on
+ * what "caveat" means.
+ */
+
+import type { KnowledgeReadinessReport } from '@tangle-network/agent-eval'
+
+import { ValidationError } from './errors'
+import type { KnowledgeReadinessDecision } from './types'
+
+const DEFAULT_MINIMUM_READINESS_SCORE = 0.7
+
+/** @stable */
+export function decideKnowledgeReadiness(
+  report: KnowledgeReadinessReport,
+  options: { minimumScore?: number } = {},
+): KnowledgeReadinessDecision {
+  const minimumScore = options.minimumScore ?? DEFAULT_MINIMUM_READINESS_SCORE
+  if (!Number.isFinite(minimumScore) || minimumScore < 0 || minimumScore > 1) {
+    throw new ValidationError(
+      `minimumScore must be a finite number in [0, 1]; received ${String(minimumScore)}`,
+    )
+  }
+  const blockingGapIds = report.blockingMissingRequirements.map((requirement) => requirement.id)
+  const nonBlockingGapIds = report.nonBlockingGaps.map((requirement) => requirement.id)
+  if (blockingGapIds.length > 0) {
+    return {
+      passed: false,
+      status: 'blocked',
+      reason: report.reason,
+      readinessScore: report.readinessScore,
+      recommendedAction: report.recommendedAction,
+      severity: report.severity,
+      blockingGapIds,
+      nonBlockingGapIds,
+    }
+  }
+  if (report.readinessScore < minimumScore) {
+    return {
+      passed: false,
+      status: 'caveat',
+      reason: `Knowledge readiness score ${report.readinessScore.toFixed(3)} is below minimum ${minimumScore.toFixed(3)}.`,
+      readinessScore: report.readinessScore,
+      recommendedAction: report.recommendedAction,
+      severity: report.severity,
+      blockingGapIds,
+      nonBlockingGapIds,
+    }
+  }
+  return {
+    passed: true,
+    status: 'ready',
+    reason: report.reason,
+    readinessScore: report.readinessScore,
+    recommendedAction: report.recommendedAction,
+    severity: report.severity,
+    blockingGapIds,
+    nonBlockingGapIds,
+  }
+}
diff --git a/src/run.ts b/src/run.ts
new file mode 100644
index 0000000..6b1271a
--- /dev/null
+++ b/src/run.ts
@@ -0,0 +1,484 @@
+/**
+ * @stable
+ *
+ * The two top-level entry points:
+ *
+ *  - `runAgentTask` — single-shot lifecycle for adapter-driven tasks.
+ *  - `runAgentTaskStream` — streaming lifecycle that delegates execution to an
+ *    `AgentExecutionBackend` (model API, sandbox, or custom iterable).
+ *
+ * Both gate the run on `KnowledgeReadinessReport` from `agent-eval`, emit the
+ * same lifecycle event vocabulary (under different shapes — see `types.ts`),
+ * and route session lifecycle through a pluggable `RuntimeSessionStore`.
+ */
+
+import {
+  acquisitionPlansForKnowledgeGaps,
+  blockingKnowledgeEval,
+  type ControlContext,
+  type ControlEvalResult,
+  type ControlRunResult,
+  type DataAcquisitionPlan,
+  type KnowledgeReadinessReport,
+  runAgentControlLoop,
+  scoreKnowledgeReadiness,
+  type UserQuestion,
+  userQuestionsForKnowledgeGaps,
+} from '@tangle-network/agent-eval'
+
+import { normalizeBackendStreamEvent } from './backends'
+import { SessionMismatchError } from './errors'
+import { decideKnowledgeReadiness } from './readiness'
+import { newRuntimeSession, nowIso, touchSession } from './sessions'
+import type {
+  AgentBackendInput,
+  AgentExecutionBackend,
+  AgentKnowledgeProvider,
+  AgentRuntimeEventSink,
+  AgentTaskContext,
+  AgentTaskRunResult,
+  AgentTaskRunSummary,
+  AgentTaskSpec,
+  AgentTaskStatus,
+  RunAgentTaskOptions,
+  RunAgentTaskStreamOptions,
+  RuntimeSession,
+  RuntimeStreamEvent,
+} from './types'
+
+/** @stable */
+export async function runAgentTask<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult = ControlEvalResult,
+>(
+  options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>,
+): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>> {
+  const task = options.task
+  await emit(options.onEvent, { type: 'task_start', task })
+  await emit(options.onEvent, { type: 'readiness_start', task })
+  let knowledge = await buildReadiness(task, options.knowledge)
+  await emit(options.onEvent, { type: 'readiness_end', task, knowledge })
+  const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements)
+  const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
+    ...knowledge.blockingMissingRequirements,
+    ...knowledge.nonBlockingGaps,
+  ])
+  const preflight = await runKnowledgePreflight(
+    task,
+    questions,
+    acquisitionPlans,
+    options.knowledge,
+    options.onEvent,
+  )
+  if (
+    options.knowledge?.refreshReadiness &&
+    (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)
+  ) {
+    await emit(options.onEvent, { type: 'readiness_start', task })
+    knowledge = await options.knowledge.refreshReadiness({
+      task,
+      previous: knowledge,
+      userAnswers: preflight.userAnswers,
+      acquiredEvidenceIds: preflight.acquiredEvidenceIds,
+    })
+    await emit(options.onEvent, { type: 'readiness_end', task, knowledge })
+  }
+
+  await emit(options.onEvent, { type: 'control_start', task, knowledge })
+  const scenarioId = options.scenarioId ?? task.id
+  const control = await runAgentControlLoop<TState, TAction, TActionResult, TEval>({
+    intent: task.intent,
+    budget: task.budget,
+    signal: options.signal,
+    store: options.store,
+    scenarioId,
+    projectId: options.projectId,
+    variantId: options.variantId,
+    observe: ({ history, abortSignal }) =>
+      options.adapter.observe({ task, knowledge, history, abortSignal }),
+    validate: async ({ state, history, abortSignal }) => {
+      const readinessEval = blockingKnowledgeEval(knowledge, {
+        minimumScore: options.minimumReadinessScore,
+      })
+      const evals = await options.adapter.validate({
+        task,
+        knowledge,
+        state,
+        history,
+        abortSignal,
+      })
+      return [readinessEval as TEval, ...evals]
+    },
+    decide: (ctx) => {
+      if (isKnowledgeBlocked(ctx.evals)) {
+        return (
+          options.adapter.onKnowledgeBlocked?.({
+            task,
+            knowledge,
+            questions,
+            acquisitionPlans,
+          }) ?? {
+            type: 'stop',
+            pass: false,
+            score: knowledge.readinessScore,
+            reason: `knowledge readiness blocked: ${knowledge.reason}`,
+          }
+        )
+      }
+      return options.adapter.decide(toAgentContext(task, knowledge, ctx))
+    },
+    act: (action, ctx) => options.adapter.act(action, toAgentContext(task, knowledge, ctx)),
+    shouldStop: options.adapter.shouldStop
+      ? (ctx) => options.adapter.shouldStop!(toAgentContext(task, knowledge, ctx))
+      : undefined,
+    getActionCostUsd: options.adapter.getActionCostUsd
+      ? ({ action, result, state, evals, history }) =>
+          options.adapter.getActionCostUsd!({ action, result, task, state, evals, history })
+      : undefined,
+    onStep: (step) => emit(options.onEvent, { type: 'control_step', task, step }),
+  })
+  await emit(options.onEvent, { type: 'control_end', task, control })
+  const status = statusFromControl(control)
+  await emit(options.onEvent, { type: 'task_end', task, status, reason: control.reason })
+
+  return {
+    task,
+    status,
+    knowledge,
+    questions,
+    acquisitionPlans,
+    userAnswers: preflight.userAnswers,
+    acquiredEvidenceIds: preflight.acquiredEvidenceIds,
+    control,
+    runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map((record) =>
+      record.scenarioId === undefined ? { ...record, scenarioId } : record,
+    ),
+  }
+}
+
+/** @stable */
+export function summarizeAgentTaskRun<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult,
+>(result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>): AgentTaskRunSummary {
+  return {
+    taskId: result.task.id,
+    domain: result.task.domain,
+    status: result.status,
+    reason: result.control.reason,
+    readinessStatus: decideKnowledgeReadiness(result.knowledge).status,
+    readinessScore: result.knowledge.readinessScore,
+    recommendedAction: result.knowledge.recommendedAction,
+    blockingGapIds: result.knowledge.blockingMissingRequirements.map(
+      (requirement) => requirement.id,
+    ),
+    nonBlockingGapIds: result.knowledge.nonBlockingGaps.map((requirement) => requirement.id),
+    questionCount: result.questions.length,
+    acquisitionPlanCount: result.acquisitionPlans.length,
+    acquiredEvidenceCount: result.acquiredEvidenceIds.length,
+    controlStepCount: result.control.steps.length,
+    pass: result.control.pass,
+    failureClass: result.control.failureClass,
+    wallMs: result.control.wallMs,
+    costUsd: result.control.spentCostUsd,
+  }
+}
+
+/** @stable */
+export async function* runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(
+  options: RunAgentTaskStreamOptions<TInput>,
+): AsyncIterable<RuntimeStreamEvent> {
+  const task = options.task
+  const input = { task, ...(options.input ?? {}) } as TInput
+  yield streamEvent({ type: 'task_start', task })
+
+  yield streamEvent({ type: 'readiness_start', task })
+  let knowledge = await buildReadiness(task, options.knowledge)
+  const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements)
+  const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
+    ...knowledge.blockingMissingRequirements,
+    ...knowledge.nonBlockingGaps,
+  ])
+  const preflight = await runKnowledgePreflightStream(
+    task,
+    questions,
+    acquisitionPlans,
+    options.knowledge,
+  )
+  for (const event of preflight.events) yield event
+  if (
+    options.knowledge?.refreshReadiness &&
+    (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)
+  ) {
+    yield streamEvent({ type: 'readiness_start', task })
+    knowledge = await options.knowledge.refreshReadiness({
+      task,
+      previous: knowledge,
+      userAnswers: preflight.userAnswers,
+      acquiredEvidenceIds: preflight.acquiredEvidenceIds,
+    })
+  }
+  const decision = decideKnowledgeReadiness(knowledge, {
+    minimumScore: options.minimumReadinessScore,
+  })
+  yield streamEvent({ type: 'readiness_end', task, knowledge, decision })
+  if (!decision.passed && decision.status === 'blocked') {
+    const reason = `knowledge readiness blocked: ${decision.reason}`
+    yield streamEvent({ type: 'task_end', task, status: 'blocked', reason })
+    yield streamEvent({ type: 'final', task, status: 'blocked', reason })
+    return
+  }
+
+  const store = options.sessionStore
+  const existing = options.sessionId ? await store?.get(options.sessionId) : undefined
+  const shouldResume = Boolean(options.resume && existing)
+  let session =
+    shouldResume && existing
+      ? await resumeBackendSession(options.backend, existing, input, {
+          task,
+          knowledge,
+          signal: options.signal,
+        })
+      : await startBackendSession(
+          options.backend,
+          input,
+          { task, knowledge, signal: options.signal },
+          options.sessionId,
+        )
+  await store?.put(session)
+  const sessionEvent = streamEvent({
+    type: shouldResume ? 'session_resumed' : 'session_created',
+    task,
+    session,
+  })
+  await store?.appendEvent?.(session.id, sessionEvent)
+  yield sessionEvent
+
+  const backendStart = streamEvent({
+    type: 'backend_start',
+    task,
+    session,
+    backend: options.backend.kind,
+  })
+  await store?.appendEvent?.(session.id, backendStart)
+  yield backendStart
+
+  let finalText = ''
+  try {
+    for await (const rawEvent of options.backend.stream(input, {
+      task,
+      knowledge,
+      session,
+      signal: options.signal,
+    })) {
+      const event = normalizeBackendStreamEvent(rawEvent, task, session)
+      if (event.type === 'text_delta') finalText += event.text
+      await store?.appendEvent?.(session.id, event)
+      yield event
+    }
+    const completedStatus: AgentTaskStatus = 'completed'
+    session = touchSession({ ...session, status: completedStatus })
+    await store?.put(session)
+    const backendEnd = streamEvent({
+      type: 'backend_end',
+      task,
+      session,
+      backend: options.backend.kind,
+    })
+    await store?.appendEvent?.(session.id, backendEnd)
+    yield backendEnd
+    const reason = 'backend completed'
+    const taskEnd = streamEvent({ type: 'task_end', task, status: completedStatus, reason })
+    await store?.appendEvent?.(session.id, taskEnd)
+    yield taskEnd
+    const final = streamEvent({
+      type: 'final',
+      task,
+      session,
+      status: completedStatus,
+      reason,
+      text: finalText || undefined,
+    })
+    await store?.appendEvent?.(session.id, final)
+    yield final
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err)
+    session = touchSession({ ...session, status: options.signal?.aborted ? 'aborted' : 'failed' })
+    await store?.put(session)
+    let stopErrorMessage: string | undefined
+    try {
+      await options.backend.stop?.(session, message)
+    } catch (stopErr) {
+      stopErrorMessage = stopErr instanceof Error ? stopErr.message : String(stopErr)
+    }
+    const backendError = streamEvent({
+      type: 'backend_error',
+      task,
+      session,
+      backend: options.backend.kind,
+      message: stopErrorMessage ? `${message}; backend stop failed: ${stopErrorMessage}` : message,
+      recoverable: !options.signal?.aborted,
+    })
+    await store?.appendEvent?.(session.id, backendError)
+    yield backendError
+    const status: AgentTaskStatus = options.signal?.aborted ? 'aborted' : 'failed'
+    const taskEnd = streamEvent({ type: 'task_end', task, status, reason: message })
+    await store?.appendEvent?.(session.id, taskEnd)
+    yield taskEnd
+    const final = streamEvent({
+      type: 'final',
+      task,
+      session,
+      status,
+      reason: message,
+      text: finalText || undefined,
+    })
+    await store?.appendEvent?.(session.id, final)
+    yield final
+  }
+}
+
+async function runKnowledgePreflight<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult,
+>(
+  task: AgentTaskSpec,
+  questions: UserQuestion[],
+  acquisitionPlans: DataAcquisitionPlan[],
+  provider: AgentKnowledgeProvider | undefined,
+  onEvent: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval> | undefined,
+): Promise<{ userAnswers: Record<string, string>; acquiredEvidenceIds: string[] }> {
+  let userAnswers: Record<string, string> = {}
+  let acquiredEvidenceIds: string[] = []
+  if (questions.length > 0 && provider?.answerQuestions) {
+    await emit(onEvent, { type: 'questions_start', task, questions })
+    userAnswers = await provider.answerQuestions(questions, task)
+    await emit(onEvent, { type: 'questions_end', task, questions, userAnswers })
+  }
+  if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
+    await emit(onEvent, { type: 'acquisition_start', task, acquisitionPlans })
+    acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task)
+    await emit(onEvent, {
+      type: 'acquisition_end',
+      task,
+      acquisitionPlans,
+      acquiredEvidenceIds,
+    })
+  }
+  return { userAnswers, acquiredEvidenceIds }
+}
+
+async function runKnowledgePreflightStream(
+  task: AgentTaskSpec,
+  questions: UserQuestion[],
+  acquisitionPlans: DataAcquisitionPlan[],
+  provider: AgentKnowledgeProvider | undefined,
+): Promise<{
+  userAnswers: Record<string, string>
+  acquiredEvidenceIds: string[]
+  events: RuntimeStreamEvent[]
+}> {
+  const events: RuntimeStreamEvent[] = []
+  let userAnswers: Record<string, string> = {}
+  let acquiredEvidenceIds: string[] = []
+  if (questions.length > 0 && provider?.answerQuestions) {
+    events.push(streamEvent({ type: 'questions_start', task, questions }))
+    userAnswers = await provider.answerQuestions(questions, task)
+    events.push(streamEvent({ type: 'questions_end', task, questions, userAnswers }))
+  }
+  if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
+    events.push(streamEvent({ type: 'acquisition_start', task, acquisitionPlans }))
+    acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task)
+    events.push(
+      streamEvent({ type: 'acquisition_end', task, acquisitionPlans, acquiredEvidenceIds }),
+    )
+  }
+  return { userAnswers, acquiredEvidenceIds, events }
+}
+
+function streamEvent<T extends Omit<RuntimeStreamEvent, 'timestamp'>>(
+  event: T,
+): T & { timestamp: string } {
+  return { ...event, timestamp: nowIso() }
+}
+
+async function startBackendSession<TInput extends AgentBackendInput>(
+  backend: AgentExecutionBackend<TInput>,
+  input: TInput,
+  context: { task: AgentTaskSpec; knowledge: KnowledgeReadinessReport; signal?: AbortSignal },
+  requestedSessionId?: string,
+): Promise<RuntimeSession> {
+  if (backend.start) return backend.start(input, { ...context, requestedSessionId })
+  return newRuntimeSession(backend.kind, requestedSessionId)
+}
+
+async function resumeBackendSession<TInput extends AgentBackendInput>(
+  backend: AgentExecutionBackend<TInput>,
+  session: RuntimeSession,
+  input: TInput,
+  context: { task: AgentTaskSpec; knowledge: KnowledgeReadinessReport; signal?: AbortSignal },
+): Promise<RuntimeSession> {
+  if (session.backend !== backend.kind) {
+    throw new SessionMismatchError(session.backend, backend.kind)
+  }
+  if (backend.resume) return backend.resume(session, input, context)
+  return touchSession({ ...session, status: 'active' })
+}
+
+function buildReadiness(
+  task: AgentTaskSpec,
+  provider: AgentKnowledgeProvider | undefined,
+): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport {
+  if (provider?.buildReadiness) return provider.buildReadiness(task)
+  return scoreKnowledgeReadiness({
+    taskId: task.id,
+    requirements: task.requiredKnowledge ?? [],
+    metadata: { domain: task.domain, ...task.metadata },
+  })
+}
+
+function isKnowledgeBlocked(evals: ControlEvalResult[]): boolean {
+  return evals.some((evalResult) => evalResult.id === 'knowledge-ready' && !evalResult.passed)
+}
+
+function statusFromControl(
+  control: ControlRunResult<unknown, unknown, unknown, ControlEvalResult>,
+): AgentTaskStatus {
+  if (control.stoppedBy === 'abort') return 'aborted'
+  if (control.reason.includes('knowledge readiness blocked')) return 'blocked'
+  if (control.pass) return 'completed'
+  return 'failed'
+}
+
+async function emit<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
+  sink: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval> | undefined,
+  event: Parameters<AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>>[0],
+): Promise<void> {
+  await sink?.(event)
+}
+
+function toAgentContext<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
+  task: AgentTaskSpec,
+  knowledge: KnowledgeReadinessReport,
+  ctx: ControlContext<TState, TAction, TActionResult, TEval>,
+): AgentTaskContext<TState, TAction, TActionResult, TEval> {
+  return {
+    task,
+    knowledge,
+    state: ctx.state,
+    evals: ctx.evals,
+    history: ctx.history,
+    budget: ctx.budget,
+    stepIndex: ctx.stepIndex,
+    wallMs: ctx.wallMs,
+    spentCostUsd: ctx.spentCostUsd,
+    remainingCostUsd: ctx.remainingCostUsd,
+    abortSignal: ctx.abortSignal,
+  }
+}
diff --git a/src/runtime-run.ts b/src/runtime-run.ts
new file mode 100644
index 0000000..fe904ea
--- /dev/null
+++ b/src/runtime-run.ts
@@ -0,0 +1,286 @@
+/**
+ * @stable
+ *
+ * Canonical production-run lifecycle. ONE abstraction for "the agent did a
+ * thing on behalf of a customer; record what it did, what it cost, and how it
+ * ended." Consumer agents (legal, tax, gtm, creative, agent-builder) reach for
+ * `startRuntimeRun` instead of inventing their own `agentRuns`-row helpers.
+ *
+ * Three concerns live in this module:
+ *
+ *  1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
+ *     enforced by `RuntimeRunStateError`. Completion is idempotent (a second
+ *     `complete()` call with the same status is a no-op so retries / cleanup
+ *     paths don't double-fire side effects). A different terminal status is a
+ *     state error.
+ *
+ *  2. **Cost ledger** — every `llm_call` event the handle observes contributes
+ *     `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
+ *     measured from `startRuntimeRun()` to `complete()`. Surface via
+ *     `handle.cost()` for "cost per customer task" dashboards.
+ *
+ *  3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
+ *     consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
+ *     KV store. The adapter receives a sanitized row shape; no telemetry
+ *     payload bytes flow through it unless the consumer opts in via
+ *     `RuntimeRunOptions.telemetryEvents`.
+ *
+ * The pattern replaces legal-agent's bespoke `completeProductionAgentRun` /
+ * `persistRuntimeRun` pair from `eval-evidence.ts` + `api.chat.ts`. Both are
+ * marked `@deprecated` in this release; consumers ditch them on their own
+ * version bumps.
+ */
+
+import { RuntimeRunStateError, ValidationError } from './errors'
+import type { AgentTaskSpec, RuntimeStreamEvent } from './types'
+
+/** @stable */
+export type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled'
+
+/** @stable */
+export interface RuntimeRunCost {
+  /** Cumulative input tokens across every observed `llm_call` event. */
+  tokensIn: number
+  /** Cumulative output tokens across every observed `llm_call` event. */
+  tokensOut: number
+  /** Sum of `costUsd` from every observed `llm_call` event. */
+  costUsd: number
+  /** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
+  wallMs: number
+  /** Count of `llm_call` events observed during the run. */
+  llmCalls: number
+}
+
+/** @stable */
+export interface RuntimeRunCompleteInput {
+  status: Exclude<RuntimeRunStatus, 'running'>
+  resultSummary?: string
+  /** Optional explicit cost override; if omitted, the accumulated ledger is used. */
+  cost?: Partial<RuntimeRunCost>
+  /** Stable error message when `status === 'failed'`. */
+  error?: string
+  /** Additional adapter-specific fields merged into the persisted row. */
+  metadata?: Record<string, unknown>
+}
+
+/** @stable */
+export interface RuntimeRunRow {
+  /** Stable runtime-side identifier. Adapters may translate to their own primary key. */
+  id: string
+  workspaceId: string
+  sessionId?: string
+  agentId?: string
+  domain?: string
+  taskId: string
+  scenarioId?: string
+  status: RuntimeRunStatus
+  resultSummary?: string
+  error?: string
+  cost: RuntimeRunCost
+  startedAt: string
+  completedAt?: string
+  metadata?: Record<string, unknown>
+}
+
+/** @stable */
+export interface RuntimeRunPersistenceAdapter {
+  /**
+   * Called once when `handle.persist()` runs. Implementations write `row` to
+   * their durable store (D1, postgres, KV) and return whatever the consumer
+   * wants the caller to see (often the storage-side row id). Errors thrown
+   * here propagate out of `persist()` so the caller can decide whether to
+   * retry or log-and-continue.
+   */
+  upsert(row: RuntimeRunRow): Promise<void> | void
+}
+
+/** @stable */
+export interface RuntimeRunOptions {
+  workspaceId: string
+  sessionId?: string
+  agentId?: string
+  taskSpec: AgentTaskSpec
+  scenarioId?: string
+  /** Optional persistence adapter; if omitted, `persist()` is a no-op. */
+  adapter?: RuntimeRunPersistenceAdapter
+  /** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
+  id?: string
+  /** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
+  now?: () => number
+}
+
+/** @stable */
+export interface RuntimeRunHandle {
+  /** Stable id assigned at start. */
+  readonly id: string
+  readonly workspaceId: string
+  readonly sessionId: string | undefined
+  readonly taskSpec: AgentTaskSpec
+  readonly status: RuntimeRunStatus
+
+  /**
+   * Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
+   * (text deltas, tool calls) silently so consumers can pipe the whole stream
+   * through `handle.observe`. `llm_call` events update the ledger.
+   */
+  observe(event: RuntimeStreamEvent): void
+
+  /** Snapshot of the current cost ledger. Safe to call at any time. */
+  cost(): RuntimeRunCost
+
+  /**
+   * Transition to a terminal state. Idempotent for the same status; throws
+   * `RuntimeRunStateError` for a different terminal status (state machines
+   * don't time-travel).
+   */
+  complete(input: RuntimeRunCompleteInput): void
+
+  /** Build the current row without writing it. Useful for tests + dry runs. */
+  toRow(metadata?: Record<string, unknown>): RuntimeRunRow
+
+  /**
+   * Persist the current row via the configured adapter. Must be called after
+   * `complete()`. Idempotent for the same terminal state (the adapter sees
+   * the same row on retry).
+   */
+  persist(metadata?: Record<string, unknown>): Promise<void>
+}
+
+/**
+ * @stable
+ *
+ * Construct a runtime-run handle. The returned handle is mutable across its
+ * lifetime; consumers should not share it across requests.
+ */
+export function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle {
+  if (!options.workspaceId) {
+    throw new ValidationError('startRuntimeRun: workspaceId is required')
+  }
+  if (!options.taskSpec?.id) {
+    throw new ValidationError('startRuntimeRun: taskSpec.id is required')
+  }
+  const now = options.now ?? Date.now
+  const startedAtMs = now()
+  const startedAt = new Date(startedAtMs).toISOString()
+  const id = options.id ?? `${options.taskSpec.id}:${randomSuffix()}`
+
+  let status: RuntimeRunStatus = 'running'
+  let completedAtMs: number | undefined
+  let resultSummary: string | undefined
+  let error: string | undefined
+  let completionMetadata: Record<string, unknown> | undefined
+
+  const ledger: RuntimeRunCost = {
+    tokensIn: 0,
+    tokensOut: 0,
+    costUsd: 0,
+    wallMs: 0,
+    llmCalls: 0,
+  }
+
+  const snapshotCost = (): RuntimeRunCost => ({
+    tokensIn: ledger.tokensIn,
+    tokensOut: ledger.tokensOut,
+    costUsd: ledger.costUsd,
+    wallMs: (completedAtMs ?? now()) - startedAtMs,
+    llmCalls: ledger.llmCalls,
+  })
+
+  const buildRow = (extraMetadata?: Record<string, unknown>): RuntimeRunRow => ({
+    id,
+    workspaceId: options.workspaceId,
+    sessionId: options.sessionId,
+    agentId: options.agentId,
+    domain: options.taskSpec.domain,
+    taskId: options.taskSpec.id,
+    scenarioId: options.scenarioId,
+    status,
+    resultSummary,
+    error,
+    cost: snapshotCost(),
+    startedAt,
+    completedAt: completedAtMs !== undefined ? new Date(completedAtMs).toISOString() : undefined,
+    metadata: mergeMetadata(completionMetadata, extraMetadata),
+  })
+
+  return {
+    id,
+    workspaceId: options.workspaceId,
+    sessionId: options.sessionId,
+    taskSpec: options.taskSpec,
+    get status() {
+      return status
+    },
+    observe(event) {
+      if (event.type !== 'llm_call') return
+      ledger.llmCalls += 1
+      if (typeof event.tokensIn === 'number' && Number.isFinite(event.tokensIn)) {
+        ledger.tokensIn += event.tokensIn
+      }
+      if (typeof event.tokensOut === 'number' && Number.isFinite(event.tokensOut)) {
+        ledger.tokensOut += event.tokensOut
+      }
+      if (typeof event.costUsd === 'number' && Number.isFinite(event.costUsd)) {
+        ledger.costUsd += event.costUsd
+      }
+    },
+    cost: snapshotCost,
+    complete(input) {
+      // `input.status` is typed `Exclude<RuntimeRunStatus, 'running'>`, but
+      // a JS caller can still pass `'running'`. Validate defensively so the
+      // state machine is enforced at runtime, not just at compile time.
+      if ((input.status as RuntimeRunStatus) === 'running') {
+        throw new ValidationError('complete() requires a terminal status, got "running"')
+      }
+      if (status !== 'running') {
+        if (status === input.status) return
+        throw new RuntimeRunStateError(
+          `Cannot transition runtime run from "${status}" to "${input.status}"`,
+        )
+      }
+      status = input.status
+      completedAtMs = now()
+      resultSummary = input.resultSummary
+      error = input.error
+      completionMetadata = input.metadata
+      if (input.cost) {
+        if (typeof input.cost.tokensIn === 'number' && Number.isFinite(input.cost.tokensIn)) {
+          ledger.tokensIn = input.cost.tokensIn
+        }
+        if (typeof input.cost.tokensOut === 'number' && Number.isFinite(input.cost.tokensOut)) {
+          ledger.tokensOut = input.cost.tokensOut
+        }
+        if (typeof input.cost.costUsd === 'number' && Number.isFinite(input.cost.costUsd)) {
+          ledger.costUsd = input.cost.costUsd
+        }
+        if (typeof input.cost.llmCalls === 'number' && Number.isFinite(input.cost.llmCalls)) {
+          ledger.llmCalls = input.cost.llmCalls
+        }
+      }
+    },
+    toRow(metadata) {
+      return buildRow(metadata)
+    },
+    async persist(metadata) {
+      if (status === 'running') {
+        throw new RuntimeRunStateError('Cannot persist a runtime run before complete() is called')
+      }
+      if (!options.adapter) return
+      await options.adapter.upsert(buildRow(metadata))
+    },
+  }
+}
+
+function mergeMetadata(
+  base: Record<string, unknown> | undefined,
+  extra: Record<string, unknown> | undefined,
+): Record<string, unknown> | undefined {
+  if (!base && !extra) return undefined
+  return { ...(base ?? {}), ...(extra ?? {}) }
+}
+
+function randomSuffix(): string {
+  // Short, collision-resistant-enough for an in-memory id. Adapters that
+  // require stronger guarantees pass `options.id` explicitly.
+  return Math.random().toString(36).slice(2, 10)
+}
diff --git a/src/sanitize.ts b/src/sanitize.ts
new file mode 100644
index 0000000..6b51031
--- /dev/null
+++ b/src/sanitize.ts
@@ -0,0 +1,554 @@
+/**
+ * @stable
+ *
+ * Sanitization for runtime telemetry. The rule: nothing user-controlled leaks
+ * unless the caller opts in with a `RuntimeTelemetryOptions` flag. This is the
+ * envelope that ends up in `agent_run.metadata.runtimeEvents` on every
+ * consumer, so the default must be safe.
+ */
+
+import type {
+  ControlEvalResult,
+  ControlRunResult,
+  ControlStep,
+  DataAcquisitionPlan,
+  KnowledgeReadinessReport,
+  KnowledgeRequirement,
+  UserQuestion,
+} from '@tangle-network/agent-eval'
+
+import type {
+  AgentRuntimeEvent,
+  AgentTaskSpec,
+  AgentTaskStatus,
+  RuntimeSession,
+  RuntimeStreamEvent,
+} from './types'
+
+/** @stable */
+export interface RuntimeTelemetryOptions {
+  /**
+   * Include raw task inputs. Off by default because task inputs often contain
+   * customer facts, credentials, source text, or internal IDs.
+   */
+  includeInputs?: boolean
+  /** Include requirement descriptions. Secret requirements are always redacted. */
+  includeRequirementDescriptions?: boolean
+  /** Include evidence IDs. Off by default; counts are safer for shared reports. */
+  includeEvidenceIds?: boolean
+  /** Include user answers from question preflight. Off by default. */
+  includeUserAnswers?: boolean
+  /** Include action payloads and action results for control steps. Off by default. */
+  includeControlPayloads?: boolean
+  /** Include task metadata. Off by default because metadata may carry IDs or policy internals. */
+  includeMetadata?: boolean
+  /** Include eval detail/evidence strings. Off by default because validators may echo private input. */
+  includeEvalDetails?: boolean
+}
+
+/** @stable */
+export interface SanitizedKnowledgeRequirement {
+  id: string
+  description?: string
+  requiredFor: string[]
+  category: KnowledgeRequirement['category']
+  acquisitionMode: KnowledgeRequirement['acquisitionMode']
+  importance: KnowledgeRequirement['importance']
+  freshness: KnowledgeRequirement['freshness']
+  sensitivity: KnowledgeRequirement['sensitivity']
+  confidenceNeeded: number
+  currentConfidence: number
+  evidenceCount: number
+  evidenceIds?: string[]
+  fallbackPolicy: KnowledgeRequirement['fallbackPolicy']
+}
+
+/** @stable */
+export interface SanitizedKnowledgeReadinessReport {
+  taskId: string
+  readinessScore: number
+  recommendedAction: KnowledgeReadinessReport['recommendedAction']
+  severity: KnowledgeReadinessReport['severity']
+  reason: string
+  blockingMissingRequirements: SanitizedKnowledgeRequirement[]
+  nonBlockingGaps: SanitizedKnowledgeRequirement[]
+  evidenceCount: number
+  evidenceIds?: string[]
+  missingRequirementIds: string[]
+}
+
+/** @stable */
+export function sanitizeKnowledgeReadinessReport(
+  report: KnowledgeReadinessReport,
+  options: RuntimeTelemetryOptions = {},
+): SanitizedKnowledgeReadinessReport {
+  return {
+    taskId: report.taskId,
+    readinessScore: report.readinessScore,
+    recommendedAction: report.recommendedAction,
+    severity: report.severity,
+    reason: report.reason,
+    blockingMissingRequirements: report.blockingMissingRequirements.map((requirement) =>
+      sanitizeKnowledgeRequirement(requirement, options),
+    ),
+    nonBlockingGaps: report.nonBlockingGaps.map((requirement) =>
+      sanitizeKnowledgeRequirement(requirement, options),
+    ),
+    evidenceCount: report.bundle.evidenceIds.length,
+    evidenceIds: options.includeEvidenceIds ? report.bundle.evidenceIds : undefined,
+    missingRequirementIds: report.bundle.missing.map((requirement) => requirement.id),
+  }
+}
+
+/** @stable */
+export function sanitizeAgentRuntimeEvent<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult,
+>(
+  event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>,
+  options: RuntimeTelemetryOptions = {},
+): Record<string, unknown> {
+  const base = { type: event.type, task: sanitizeTask(event.task, options) }
+  if (
+    event.type === 'readiness_start' ||
+    event.type === 'task_start' ||
+    event.type === 'control_start'
+  ) {
+    return event.type === 'control_start'
+      ? { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) }
+      : base
+  }
+  if (event.type === 'readiness_end') {
+    return { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) }
+  }
+  if (event.type === 'questions_start') {
+    return {
+      ...base,
+      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
+    }
+  }
+  if (event.type === 'questions_end') {
+    return {
+      ...base,
+      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
+      userAnswers: options.includeUserAnswers ? event.userAnswers : redactRecord(event.userAnswers),
+    }
+  }
+  if (event.type === 'acquisition_start') {
+    return { ...base, acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan) }
+  }
+  if (event.type === 'acquisition_end') {
+    return {
+      ...base,
+      acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan),
+      acquiredEvidenceCount: event.acquiredEvidenceIds.length,
+      acquiredEvidenceIds: options.includeEvidenceIds ? event.acquiredEvidenceIds : undefined,
+    }
+  }
+  if (event.type === 'control_step') {
+    return { ...base, step: sanitizeControlStep(event.step, options) }
+  }
+  if (event.type === 'control_end') {
+    return { ...base, control: sanitizeControlRun(event.control, options) }
+  }
+  return { ...base, status: event.status, reason: event.reason }
+}
+
+/** @stable */
+export function sanitizeRuntimeStreamEvent(
+  event: RuntimeStreamEvent,
+  options: RuntimeTelemetryOptions = {},
+): Record<string, unknown> {
+  const withTask = 'task' in event && event.task ? { task: sanitizeTask(event.task, options) } : {}
+  const withSession =
+    'session' in event && event.session
+      ? { session: sanitizeRuntimeSession(event.session, options) }
+      : {}
+
+  if (event.type === 'readiness_end') {
+    return {
+      type: event.type,
+      ...withTask,
+      timestamp: event.timestamp,
+      decision: event.decision,
+      knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options),
+    }
+  }
+  if (event.type === 'questions_start') {
+    return {
+      type: event.type,
+      ...withTask,
+      timestamp: event.timestamp,
+      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
+    }
+  }
+  if (event.type === 'questions_end') {
+    return {
+      type: event.type,
+      ...withTask,
+      timestamp: event.timestamp,
+      questions: event.questions.map((question) => sanitizeQuestion(question, options)),
+      userAnswers: options.includeUserAnswers ? event.userAnswers : redactRecord(event.userAnswers),
+    }
+  }
+  if (event.type === 'acquisition_start') {
+    return {
+      type: event.type,
+      ...withTask,
+      timestamp: event.timestamp,
+      acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan),
+    }
+  }
+  if (event.type === 'acquisition_end') {
+    return {
+      type: event.type,
+      ...withTask,
+      timestamp: event.timestamp,
+      acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan),
+      acquiredEvidenceCount: event.acquiredEvidenceIds.length,
+      acquiredEvidenceIds: options.includeEvidenceIds ? event.acquiredEvidenceIds : undefined,
+    }
+  }
+  if (event.type === 'tool_call') {
+    return {
+      type: event.type,
+      ...withTask,
+      ...withSession,
+      timestamp: event.timestamp,
+      toolName: event.toolName,
+      toolCallId: event.toolCallId,
+      args: options.includeControlPayloads ? event.args : undefined,
+    }
+  }
+  if (event.type === 'tool_result') {
+    return {
+      type: event.type,
+      ...withTask,
+      ...withSession,
+      timestamp: event.timestamp,
+      toolName: event.toolName,
+      toolCallId: event.toolCallId,
+      result: options.includeControlPayloads ? event.result : undefined,
+    }
+  }
+  if (event.type === 'llm_call') {
+    return {
+      type: event.type,
+      ...withTask,
+      ...withSession,
+      timestamp: event.timestamp,
+      model: event.model,
+      tokensIn: event.tokensIn,
+      tokensOut: event.tokensOut,
+      costUsd: event.costUsd,
+      latencyMs: event.latencyMs,
+      finishReason: event.finishReason,
+    }
+  }
+  if (event.type === 'artifact') {
+    return {
+      type: event.type,
+      ...withTask,
+      ...withSession,
+      timestamp: event.timestamp,
+      artifactId: event.artifactId,
+      name: event.name,
+      mimeType: event.mimeType,
+      uri: options.includeEvidenceIds ? event.uri : undefined,
+      metadata: options.includeMetadata ? event.metadata : undefined,
+    }
+  }
+  if (event.type === 'final') {
+    return {
+      type: event.type,
+      ...withTask,
+      ...withSession,
+      timestamp: event.timestamp,
+      status: event.status,
+      reason: event.reason,
+      text: options.includeControlPayloads ? event.text : undefined,
+      metadata: options.includeMetadata ? event.metadata : undefined,
+    }
+  }
+  return {
+    type: event.type,
+    ...withTask,
+    ...withSession,
+    timestamp: 'timestamp' in event ? event.timestamp : undefined,
+    ...pickPublicStreamFields(event),
+  }
+}
+
+function sanitizeTask(
+  task: AgentTaskSpec,
+  options: RuntimeTelemetryOptions,
+): Record<string, unknown> {
+  return {
+    id: task.id,
+    intent: task.intent,
+    domain: task.domain,
+    inputs: options.includeInputs ? task.inputs : task.inputs ? '[redacted]' : undefined,
+    requiredKnowledge: task.requiredKnowledge?.map((requirement) =>
+      sanitizeKnowledgeRequirement(requirement, options),
+    ),
+    metadata: options.includeMetadata ? task.metadata : task.metadata ? '[redacted]' : undefined,
+  }
+}
+
+function sanitizeRuntimeSession(
+  session: RuntimeSession,
+  options: RuntimeTelemetryOptions,
+): Record<string, unknown> {
+  return {
+    id: session.id,
+    backend: session.backend,
+    status: session.status,
+    hasResumeToken: Boolean(session.resumeToken),
+    createdAt: session.createdAt,
+    updatedAt: session.updatedAt,
+    metadata: options.includeMetadata
+      ? session.metadata
+      : session.metadata
+        ? '[redacted]'
+        : undefined,
+  }
+}
+
+function sanitizeKnowledgeRequirement(
+  requirement: KnowledgeRequirement,
+  options: RuntimeTelemetryOptions,
+): SanitizedKnowledgeRequirement {
+  const includeDescription =
+    options.includeRequirementDescriptions && requirement.sensitivity !== 'secret'
+  return {
+    id: requirement.id,
+    description: includeDescription ? requirement.description : undefined,
+    requiredFor: requirement.requiredFor,
+    category: requirement.category,
+    acquisitionMode: requirement.acquisitionMode,
+    importance: requirement.importance,
+    freshness: requirement.freshness,
+    sensitivity: requirement.sensitivity,
+    confidenceNeeded: requirement.confidenceNeeded,
+    currentConfidence: requirement.currentConfidence,
+    evidenceCount: requirement.evidenceIds.length,
+    evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : undefined,
+    fallbackPolicy: requirement.fallbackPolicy,
+  }
+}
+
+function sanitizeQuestion(
+  question: UserQuestion,
+  options: RuntimeTelemetryOptions,
+): Record<string, unknown> {
+  return {
+    id: question.id,
+    question:
+      options.includeRequirementDescriptions && question.answerType !== 'credential'
+        ? question.question
+        : undefined,
+    reason: options.includeRequirementDescriptions ? question.reason : undefined,
+    requirementId: question.requirementId,
+    importance: question.importance,
+    answerType: question.answerType,
+    impactIfUnknown: options.includeRequirementDescriptions ? question.impactIfUnknown : undefined,
+    optionCount: question.options?.length ?? 0,
+  }
+}
+
+function sanitizeAcquisitionPlan(plan: DataAcquisitionPlan): Record<string, unknown> {
+  return {
+    id: plan.id,
+    requirementIds: plan.requirementIds,
+    mode: plan.mode,
+    priority: plan.priority,
+    expectedEvidenceCount: plan.expectedEvidenceIds?.length ?? 0,
+    questionCount: plan.questions?.length ?? 0,
+  }
+}
+
+function sanitizeControlStep<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
+  step: ControlStep<TState, TAction, TActionResult, TEval>,
+  options: RuntimeTelemetryOptions,
+): Record<string, unknown> {
+  const actionOutcome = step.actionOutcome
+  return {
+    index: step.index,
+    decisionType: step.decision.type,
+    reason: step.decision.reason,
+    action:
+      options.includeControlPayloads && step.decision.type === 'continue'
+        ? step.decision.action
+        : undefined,
+    result: options.includeControlPayloads && actionOutcome?.ok ? actionOutcome.result : undefined,
+    actionOk: actionOutcome?.ok,
+    actionError: actionOutcome?.ok === false ? actionOutcome.error : undefined,
+    durationMs: actionOutcome?.durationMs,
+    evalsBefore: summarizeEvals(step.evalsBefore, options),
+    evalsAfter: summarizeEvals(step.evalsAfter, options),
+    startedAt: step.startedAt,
+    endedAt: step.endedAt,
+  }
+}
+
+function sanitizeControlRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(
+  control: ControlRunResult<TState, TAction, TActionResult, TEval>,
+  options: RuntimeTelemetryOptions,
+): Record<string, unknown> {
+  return {
+    pass: control.pass,
+    completed: control.completed,
+    reason: control.reason,
+    score: control.score,
+    stepCount: control.steps.length,
+    wallMs: control.wallMs,
+    spentCostUsd: control.spentCostUsd,
+    failureClass: control.failureClass,
+    stoppedBy: control.stoppedBy,
+    runId: control.runId,
+    runtimeErrorCount: control.runtimeErrors.length,
+    finalEvals: summarizeEvals(control.finalEvals, options),
+  }
+}
+
+function summarizeEvals(
+  evals: ControlEvalResult[],
+  options: RuntimeTelemetryOptions,
+): Array<Record<string, unknown>> {
+  return evals.map((evalResult) => ({
+    id: evalResult.id,
+    passed: evalResult.passed,
+    score: evalResult.score,
+    severity: evalResult.severity,
+    objective: evalResult.objective,
+    detail: options.includeEvalDetails ? evalResult.detail : undefined,
+    evidence: options.includeEvalDetails ? evalResult.evidence : undefined,
+  }))
+}
+
+function redactRecord(record: Record<string, string>): Record<string, string> {
+  return Object.fromEntries(Object.keys(record).map((key) => [key, '[redacted]']))
+}
+
+function pickPublicStreamFields(event: RuntimeStreamEvent): Record<string, unknown> {
+  if (event.type === 'session_created' || event.type === 'session_resumed') return {}
+  if (event.type === 'backend_start' || event.type === 'backend_end')
+    return { backend: event.backend }
+  if (event.type === 'backend_error') {
+    return { backend: event.backend, message: event.message, recoverable: event.recoverable }
+  }
+  if (event.type === 'task_end') return { status: event.status, reason: event.reason }
+  if (event.type === 'text_delta' || event.type === 'reasoning_delta') return { text: event.text }
+  return {}
+}
+
+/** @stable */
+export interface RuntimeEventCollector<
+  TState = unknown,
+  TAction = unknown,
+  TActionResult = unknown,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> {
+  onEvent: (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => void
+  events: Array<Record<string, unknown>>
+}
+
+/** @stable */
+export type RuntimeStreamEventSink = (event: RuntimeStreamEvent) => void
+
+/** @stable */
+export interface RuntimeStreamEventSummary {
+  /** Total count of sanitized events collected. */
+  eventCount: number
+  /** Count of events per `type`. Useful for log-line summaries. */
+  eventCountsByType: Record<string, number>
+  /** First session id observed in a `session_created` / `session_resumed` event, if any. */
+  firstSessionId?: string
+  /** Last `final` event's status, if a final event was observed. */
+  finalStatus?: AgentTaskStatus
+  /** Last `final` event's reason, if a final event was observed. */
+  finalReason?: string
+  /** Concatenated `text_delta.text` across the stream, even when payloads are redacted. */
+  finalText: string
+}
+
+/** @stable */
+export interface RuntimeStreamEventCollector {
+  onEvent: RuntimeStreamEventSink
+  events: Array<Record<string, unknown>>
+  /** Snapshot of a small streaming-flavored summary derived from collected events. */
+  summary(): RuntimeStreamEventSummary
+}
+
+/** @stable */
+export function createRuntimeEventCollector<
+  TState = unknown,
+  TAction = unknown,
+  TActionResult = unknown,
+  TEval extends ControlEvalResult = ControlEvalResult,
+>(
+  options: RuntimeTelemetryOptions = {},
+): RuntimeEventCollector<TState, TAction, TActionResult, TEval> {
+  const events: Array<Record<string, unknown>> = []
+  return {
+    events,
+    onEvent: (event) => {
+      events.push(sanitizeAgentRuntimeEvent(event, options))
+    },
+  }
+}
+
+/**
+ * @stable
+ *
+ * Streaming-event counterpart of `createRuntimeEventCollector`. Use this with
+ * `runAgentTaskStream` — pass each yielded event through `onEvent` and read
+ * the sanitized copies off `events`. The same `RuntimeTelemetryOptions`
+ * redaction flags apply.
+ *
+ * Stream and non-stream events have different field shapes (timestamps,
+ * sessions, text/tool deltas) so this is a sibling factory rather than an
+ * overload of `createRuntimeEventCollector`; the unified-union alternative
+ * was rejected because dispatching on `type` alone would silently misroute
+ * events whose `type` literals overlap (`task_start`, `readiness_end`, etc.).
+ */
+export function createRuntimeStreamEventCollector(
+  options: RuntimeTelemetryOptions = {},
+): RuntimeStreamEventCollector {
+  const events: Array<Record<string, unknown>> = []
+  const eventCountsByType: Record<string, number> = {}
+  let firstSessionId: string | undefined
+  let finalStatus: AgentTaskStatus | undefined
+  let finalReason: string | undefined
+  let finalText = ''
+  return {
+    events,
+    onEvent: (event) => {
+      events.push(sanitizeRuntimeStreamEvent(event, options))
+      eventCountsByType[event.type] = (eventCountsByType[event.type] ?? 0) + 1
+      if (event.type === 'text_delta') finalText += event.text
+      if (
+        !firstSessionId &&
+        (event.type === 'session_created' || event.type === 'session_resumed')
+      ) {
+        firstSessionId = event.session.id
+      }
+      if (event.type === 'final') {
+        finalStatus = event.status
+        finalReason = event.reason
+      }
+    },
+    summary() {
+      return {
+        eventCount: events.length,
+        eventCountsByType: { ...eventCountsByType },
+        firstSessionId,
+        finalStatus,
+        finalReason,
+        finalText,
+      }
+    },
+  }
+}
diff --git a/src/sessions.ts b/src/sessions.ts
new file mode 100644
index 0000000..403e3b6
--- /dev/null
+++ b/src/sessions.ts
@@ -0,0 +1,61 @@
+/**
+ * @stable
+ *
+ * Session helpers + an in-memory `RuntimeSessionStore` implementation suitable
+ * for tests, scratch processes, and per-request scratch storage in serverless
+ * runtimes. Durable stores (D1, postgres, Durable Objects) implement the same
+ * interface from `./types`.
+ */
+
+import type { RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent } from './types'
+
+/** @internal */
+export function newRuntimeSession(
+  backend: string,
+  requestedId?: string,
+  metadata?: Record<string, unknown>,
+): RuntimeSession {
+  const now = nowIso()
+  return {
+    id: requestedId || crypto.randomUUID(),
+    backend,
+    status: 'active',
+    createdAt: now,
+    updatedAt: now,
+    metadata,
+  }
+}
+
+/** @internal */
+export function touchSession(session: RuntimeSession): RuntimeSession {
+  return { ...session, updatedAt: nowIso() }
+}
+
+/** @internal */
+export function nowIso(): string {
+  return new Date().toISOString()
+}
+
+/** @stable */
+export class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
+  private readonly sessions = new Map<string, RuntimeSession>()
+  private readonly events = new Map<string, RuntimeStreamEvent[]>()
+
+  get(sessionId: string): RuntimeSession | undefined {
+    return this.sessions.get(sessionId)
+  }
+
+  put(session: RuntimeSession): void {
+    this.sessions.set(session.id, session)
+  }
+
+  appendEvent(sessionId: string, event: RuntimeStreamEvent): void {
+    const existing = this.events.get(sessionId) ?? []
+    existing.push(event)
+    this.events.set(sessionId, existing)
+  }
+
+  listEvents(sessionId: string): RuntimeStreamEvent[] {
+    return [...(this.events.get(sessionId) ?? [])]
+  }
+}
diff --git a/src/sse.ts b/src/sse.ts
new file mode 100644
index 0000000..98276f3
--- /dev/null
+++ b/src/sse.ts
@@ -0,0 +1,70 @@
+/**
+ * @stable
+ *
+ * Server-Sent Events serialization for runtime telemetry streams.
+ *
+ * Newline-safe by construction: any newline in `id` or `event` is collapsed to
+ * a space (browsers terminate fields on newline), and multi-line `data`
+ * payloads are split into one `data:` line per source line so JSON.stringify
+ * output transports cleanly.
+ */
+
+import type { KnowledgeReadinessReport } from '@tangle-network/agent-eval'
+import type { RuntimeTelemetryOptions } from './sanitize'
+import { sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent } from './sanitize'
+import type { RuntimeStreamEvent } from './types'
+
+/** @stable */
+export interface ServerSentEventOptions {
+  event?: string
+  id?: string
+  retry?: number
+}
+
+/** @stable */
+export function encodeServerSentEvent(data: unknown, options: ServerSentEventOptions = {}): string {
+  const lines: string[] = []
+  if (options.id) lines.push(`id: ${stripNewlines(options.id)}`)
+  if (options.event) lines.push(`event: ${stripNewlines(options.event)}`)
+  if (typeof options.retry === 'number' && Number.isFinite(options.retry) && options.retry >= 0) {
+    lines.push(`retry: ${Math.floor(options.retry)}`)
+  }
+
+  const payload = typeof data === 'string' ? data : JSON.stringify(data)
+  for (const line of payload.split(/\r?\n/)) {
+    lines.push(`data: ${line}`)
+  }
+  return `${lines.join('\n')}\n\n`
+}
+
+/** @stable */
+export function readinessServerSentEvent(
+  report: KnowledgeReadinessReport,
+  options: RuntimeTelemetryOptions & ServerSentEventOptions = {},
+): string {
+  const { event, id, retry, ...telemetryOptions } = options
+  return encodeServerSentEvent(
+    {
+      type: 'readiness',
+      readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions),
+    },
+    { event, id, retry },
+  )
+}
+
+/** @stable */
+export function runtimeStreamServerSentEvent(
+  event: RuntimeStreamEvent,
+  options: RuntimeTelemetryOptions & ServerSentEventOptions = {},
+): string {
+  const { event: sseEvent, id, retry, ...telemetryOptions } = options
+  return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), {
+    event: sseEvent,
+    id,
+    retry,
+  })
+}
+
+function stripNewlines(value: string): string {
+  return value.replace(/[\r\n]/g, ' ')
+}
diff --git a/src/trace-bridge.ts b/src/trace-bridge.ts
new file mode 100644
index 0000000..77ec758
--- /dev/null
+++ b/src/trace-bridge.ts
@@ -0,0 +1,261 @@
+/**
+ * @stable
+ *
+ * Bridge from runtime stream events to the agent-eval trace schema.
+ *
+ * Before this module, consumers (legal-agent's chat.ts, gtm-agent's runtime
+ * route) hand-rolled an adapter from `RuntimeStreamEvent` -> `TraceEvent` per
+ * repo. The mapping is mechanical and the destination schema is owned by
+ * agent-eval, so the adapter belongs in runtime, not in N consumer repos.
+ *
+ * The bridge is intentionally one-way (runtime -> agent-eval). The reverse
+ * mapping is degenerate (agent-eval events have no session / task affinity)
+ * and would invite consumers to round-trip through agent-eval, defeating the
+ * point of the runtime-specific shape.
+ */
+
+import type { EventKind, TraceEvent } from '@tangle-network/agent-eval'
+
+import { ValidationError } from './errors'
+import type { RuntimeStreamEvent } from './types'
+
+/** @stable */
+export interface TraceBridgeOptions {
+  /**
+   * Stable `runId` to stamp on every emitted `TraceEvent`. Required because
+   * agent-eval's `TraceEvent.runId` is non-optional.
+   */
+  runId: string
+  /**
+   * Optional `spanId` to attach when an event maps to a known span (for
+   * example, an outer runtime-task span the consumer is already emitting).
+   */
+  spanId?: string
+  /**
+   * Optional id generator; default = monotonic counter scoped to this bridge
+   * instance. Override for deterministic tests or to integrate with a wider
+   * id-allocator (uuid, ksuid).
+   */
+  newEventId?: () => string
+}
+
+/** @stable */
+export interface TraceBridge {
+  /**
+   * Map a single `RuntimeStreamEvent` to a `TraceEvent`. Returns `undefined`
+   * for events that have no useful trace projection (text deltas, reasoning
+   * deltas — these belong inside an `LlmSpan.output`, not as separate trace
+   * events).
+   */
+  toTraceEvent(event: RuntimeStreamEvent): TraceEvent | undefined
+  /** Convenience: drain an iterable of stream events into trace events. */
+  drain(events: Iterable<RuntimeStreamEvent>): TraceEvent[]
+}
+
+/**
+ * @stable
+ *
+ * Build a stateful bridge. State is intentionally minimal — only the event-id
+ * counter — because the runtime stream already carries timestamps and the
+ * caller already knows the `runId`.
+ */
+export function createTraceBridge(options: TraceBridgeOptions): TraceBridge {
+  if (!options.runId) {
+    throw new ValidationError('createTraceBridge: runId is required')
+  }
+  let counter = 0
+  const newEventId = options.newEventId ?? (() => `evt-${++counter}`)
+  const baseSpanId = options.spanId
+
+  const toTraceEvent = (event: RuntimeStreamEvent): TraceEvent | undefined => {
+    const projection = projectToTraceEvent(event)
+    if (!projection) return undefined
+    return {
+      eventId: newEventId(),
+      runId: options.runId,
+      spanId: baseSpanId,
+      kind: projection.kind,
+      timestamp: timestampFor(event),
+      payload: projection.payload,
+    }
+  }
+
+  return {
+    toTraceEvent,
+    drain(events) {
+      const out: TraceEvent[] = []
+      for (const event of events) {
+        const trace = toTraceEvent(event)
+        if (trace) out.push(trace)
+      }
+      return out
+    },
+  }
+}
+
+/**
+ * @stable
+ *
+ * One-shot convenience for callers who don't want to hold a bridge instance.
+ * Internally allocates a single-use bridge so id-generation stays consistent
+ * within the call.
+ */
+export function toAgentEvalTrace(
+  event: RuntimeStreamEvent,
+  options: TraceBridgeOptions,
+): TraceEvent | undefined {
+  return createTraceBridge(options).toTraceEvent(event)
+}
+
+interface TraceProjection {
+  kind: EventKind
+  payload: Record<string, unknown>
+}
+
+function projectToTraceEvent(event: RuntimeStreamEvent): TraceProjection | undefined {
+  switch (event.type) {
+    case 'task_start':
+      return {
+        kind: 'log',
+        payload: { phase: 'task_start', taskId: event.task.id, intent: event.task.intent },
+      }
+    case 'readiness_start':
+      return { kind: 'log', payload: { phase: 'readiness_start', taskId: event.task.id } }
+    case 'readiness_end':
+      return {
+        kind: event.decision.passed ? 'log' : 'policy_violation',
+        payload: {
+          phase: 'readiness_end',
+          taskId: event.task.id,
+          status: event.decision.status,
+          readinessScore: event.decision.readinessScore,
+          blockingGapIds: event.decision.blockingGapIds,
+          nonBlockingGapIds: event.decision.nonBlockingGapIds,
+          reason: event.decision.reason,
+        },
+      }
+    case 'questions_start':
+      return {
+        kind: 'log',
+        payload: { phase: 'questions_start', questionCount: event.questions.length },
+      }
+    case 'questions_end':
+      return {
+        kind: 'log',
+        payload: {
+          phase: 'questions_end',
+          questionCount: event.questions.length,
+          answerCount: Object.keys(event.userAnswers).length,
+        },
+      }
+    case 'acquisition_start':
+      return {
+        kind: 'log',
+        payload: { phase: 'acquisition_start', planCount: event.acquisitionPlans.length },
+      }
+    case 'acquisition_end':
+      return {
+        kind: 'log',
+        payload: {
+          phase: 'acquisition_end',
+          planCount: event.acquisitionPlans.length,
+          evidenceCount: event.acquiredEvidenceIds.length,
+        },
+      }
+    case 'session_created':
+    case 'session_resumed':
+      return {
+        kind: 'log',
+        payload: {
+          phase: event.type,
+          sessionId: event.session.id,
+          backend: event.session.backend,
+        },
+      }
+    case 'backend_start':
+    case 'backend_end':
+      return { kind: 'log', payload: { phase: event.type, backend: event.backend } }
+    case 'backend_error':
+      return {
+        kind: 'error',
+        payload: {
+          backend: event.backend,
+          message: event.message,
+          recoverable: event.recoverable,
+        },
+      }
+    case 'tool_call':
+      return {
+        kind: 'log',
+        payload: {
+          phase: 'tool_call',
+          toolName: event.toolName,
+          toolCallId: event.toolCallId,
+          // Args intentionally omitted at this layer; consumers attach the
+          // payload to a `ToolSpan` if they need to retain it. Trace events
+          // are point-in-time markers, not the canonical store for tool I/O.
+        },
+      }
+    case 'tool_result':
+      return {
+        kind: 'log',
+        payload: {
+          phase: 'tool_result',
+          toolName: event.toolName,
+          toolCallId: event.toolCallId,
+        },
+      }
+    case 'llm_call':
+      return {
+        kind: 'log',
+        payload: {
+          phase: 'llm_call',
+          model: event.model,
+          tokensIn: event.tokensIn,
+          tokensOut: event.tokensOut,
+          costUsd: event.costUsd,
+          latencyMs: event.latencyMs,
+          finishReason: event.finishReason,
+        },
+      }
+    case 'artifact':
+      return {
+        kind: 'state_mutation',
+        payload: {
+          phase: 'artifact',
+          artifactId: event.artifactId,
+          name: event.name,
+          mimeType: event.mimeType,
+        },
+      }
+    case 'task_end':
+      return {
+        kind: event.status === 'failed' || event.status === 'aborted' ? 'error' : 'log',
+        payload: { phase: 'task_end', status: event.status, reason: event.reason },
+      }
+    case 'final':
+      return {
+        kind: event.status === 'failed' || event.status === 'aborted' ? 'error' : 'log',
+        payload: { phase: 'final', status: event.status, reason: event.reason },
+      }
+    case 'text_delta':
+    case 'reasoning_delta':
+      // Token-level deltas don't map cleanly to `TraceEvent`. Consumers that
+      // want the final text should accumulate it into an `LlmSpan.output` or
+      // a `final` event, both of which the bridge does cover.
+      return undefined
+    default: {
+      // Exhaustiveness fallback; future event types should add a case above.
+      const exhaust: never = event
+      void exhaust
+      return undefined
+    }
+  }
+}
+
+function timestampFor(event: RuntimeStreamEvent): number {
+  const iso = 'timestamp' in event ? event.timestamp : undefined
+  if (!iso) return Date.now()
+  const parsed = Date.parse(iso)
+  return Number.isFinite(parsed) ? parsed : Date.now()
+}
diff --git a/src/types.ts b/src/types.ts
new file mode 100644
index 0000000..e90e55b
--- /dev/null
+++ b/src/types.ts
@@ -0,0 +1,462 @@
+/**
+ * @stable
+ *
+ * Core task, session, adapter, and stream-event types for the runtime.
+ *
+ * This module owns the public shape of every cross-cutting record (`TaskSpec`,
+ * `RuntimeSession`, `RuntimeStreamEvent`). Everything else in the runtime
+ * imports from here so type-level changes ripple in one place.
+ */
+
+import type {
+  ControlBudget,
+  ControlDecision,
+  ControlEvalResult,
+  ControlRunResult,
+  ControlStep,
+  DataAcquisitionPlan,
+  KnowledgeReadinessReport,
+  KnowledgeRequirement,
+  RunRecord,
+  TraceStore,
+  UserQuestion,
+} from '@tangle-network/agent-eval'
+
+/** @stable */
+export interface AgentTaskSpec {
+  id: string
+  intent: string
+  /** Domain is metadata, not an architectural boundary: tax, legal, gtm, creative, blueprint, redteam, etc. */
+  domain?: string
+  inputs?: Record<string, unknown>
+  requiredKnowledge?: KnowledgeRequirement[]
+  budget?: Partial<ControlBudget>
+  metadata?: Record<string, unknown>
+}
+
+/** @stable */
+export interface AgentKnowledgeProvider {
+  buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport
+  answerQuestions?(
+    questions: UserQuestion[],
+    task: AgentTaskSpec,
+  ): Promise<Record<string, string>> | Record<string, string>
+  executeAcquisitionPlans?(
+    plans: DataAcquisitionPlan[],
+    task: AgentTaskSpec,
+  ): Promise<string[]> | string[]
+  refreshReadiness?(input: {
+    task: AgentTaskSpec
+    previous: KnowledgeReadinessReport
+    userAnswers: Record<string, string>
+    acquiredEvidenceIds: string[]
+  }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport
+}
+
+/** @stable */
+export interface AgentTaskContext<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> {
+  task: AgentTaskSpec
+  knowledge: KnowledgeReadinessReport
+  state: TState
+  evals: TEval[]
+  history: ControlStep<TState, TAction, TActionResult, TEval>[]
+  budget: ControlBudget
+  stepIndex: number
+  wallMs: number
+  spentCostUsd: number
+  remainingCostUsd?: number
+  abortSignal: AbortSignal
+}
+
+/** @stable */
+export interface AgentAdapter<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> {
+  observe(ctx: {
+    task: AgentTaskSpec
+    knowledge: KnowledgeReadinessReport
+    history: ControlStep<TState, TAction, TActionResult, TEval>[]
+    abortSignal: AbortSignal
+  }): Promise<TState> | TState
+
+  validate(ctx: {
+    task: AgentTaskSpec
+    knowledge: KnowledgeReadinessReport
+    state: TState
+    history: ControlStep<TState, TAction, TActionResult, TEval>[]
+    abortSignal: AbortSignal
+  }): Promise<TEval[]> | TEval[]
+
+  decide(
+    ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>,
+  ): Promise<ControlDecision<TAction>> | ControlDecision<TAction>
+
+  act(
+    action: TAction,
+    ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>,
+  ): Promise<TActionResult> | TActionResult
+
+  shouldStop?(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>):
+    | Promise<{
+        stop: boolean
+        pass: boolean
+        reason: string
+        score?: number
+      }>
+    | {
+        stop: boolean
+        pass: boolean
+        reason: string
+        score?: number
+      }
+
+  onKnowledgeBlocked?(ctx: {
+    task: AgentTaskSpec
+    knowledge: KnowledgeReadinessReport
+    questions: UserQuestion[]
+    acquisitionPlans: DataAcquisitionPlan[]
+  }): Promise<ControlDecision<TAction>> | ControlDecision<TAction>
+
+  getActionCostUsd?(ctx: {
+    action: TAction
+    result: TActionResult
+    task: AgentTaskSpec
+    state: TState
+    evals: TEval[]
+    history: ControlStep<TState, TAction, TActionResult, TEval>[]
+  }): number | undefined
+
+  projectRunRecords?(
+    result: ControlRunResult<TState, TAction, TActionResult, TEval>,
+    task: AgentTaskSpec,
+  ): RunRecord[]
+}
+
+/** @stable */
+export type AgentTaskStatus = 'completed' | 'blocked' | 'failed' | 'aborted'
+
+/** @stable */
+export type AgentRuntimeEvent<
+  TState = unknown,
+  TAction = unknown,
+  TActionResult = unknown,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> =
+  | { type: 'task_start'; task: AgentTaskSpec }
+  | { type: 'readiness_start'; task: AgentTaskSpec }
+  | { type: 'readiness_end'; task: AgentTaskSpec; knowledge: KnowledgeReadinessReport }
+  | { type: 'questions_start'; task: AgentTaskSpec; questions: UserQuestion[] }
+  | {
+      type: 'questions_end'
+      task: AgentTaskSpec
+      questions: UserQuestion[]
+      userAnswers: Record<string, string>
+    }
+  | {
+      type: 'acquisition_start'
+      task: AgentTaskSpec
+      acquisitionPlans: DataAcquisitionPlan[]
+    }
+  | {
+      type: 'acquisition_end'
+      task: AgentTaskSpec
+      acquisitionPlans: DataAcquisitionPlan[]
+      acquiredEvidenceIds: string[]
+    }
+  | { type: 'control_start'; task: AgentTaskSpec; knowledge: KnowledgeReadinessReport }
+  | {
+      type: 'control_step'
+      task: AgentTaskSpec
+      step: ControlStep<TState, TAction, TActionResult, TEval>
+    }
+  | {
+      type: 'control_end'
+      task: AgentTaskSpec
+      control: ControlRunResult<TState, TAction, TActionResult, TEval>
+    }
+  | { type: 'task_end'; task: AgentTaskSpec; status: AgentTaskStatus; reason: string }
+
+/** @stable */
+export type AgentRuntimeEventSink<
+  TState = unknown,
+  TAction = unknown,
+  TActionResult = unknown,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> = (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => Promise<void> | void
+
+/** @stable */
+export type RuntimeStreamEvent =
+  | { type: 'task_start'; task: AgentTaskSpec; timestamp: string }
+  | { type: 'readiness_start'; task: AgentTaskSpec; timestamp: string }
+  | {
+      type: 'readiness_end'
+      task: AgentTaskSpec
+      knowledge: KnowledgeReadinessReport
+      decision: KnowledgeReadinessDecision
+      timestamp: string
+    }
+  | {
+      type: 'questions_start'
+      task: AgentTaskSpec
+      questions: UserQuestion[]
+      timestamp: string
+    }
+  | {
+      type: 'questions_end'
+      task: AgentTaskSpec
+      questions: UserQuestion[]
+      userAnswers: Record<string, string>
+      timestamp: string
+    }
+  | {
+      type: 'acquisition_start'
+      task: AgentTaskSpec
+      acquisitionPlans: DataAcquisitionPlan[]
+      timestamp: string
+    }
+  | {
+      type: 'acquisition_end'
+      task: AgentTaskSpec
+      acquisitionPlans: DataAcquisitionPlan[]
+      acquiredEvidenceIds: string[]
+      timestamp: string
+    }
+  | { type: 'session_created'; task: AgentTaskSpec; session: RuntimeSession; timestamp: string }
+  | { type: 'session_resumed'; task: AgentTaskSpec; session: RuntimeSession; timestamp: string }
+  | {
+      type: 'backend_start'
+      task: AgentTaskSpec
+      session: RuntimeSession
+      backend: string
+      timestamp: string
+    }
+  | {
+      type: 'text_delta'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      text: string
+      timestamp?: string
+    }
+  | {
+      type: 'reasoning_delta'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      text: string
+      timestamp?: string
+    }
+  | {
+      type: 'tool_call'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      toolName: string
+      toolCallId?: string
+      args?: unknown
+      timestamp?: string
+    }
+  | {
+      type: 'tool_result'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      toolName: string
+      toolCallId?: string
+      result?: unknown
+      timestamp?: string
+    }
+  | {
+      type: 'llm_call'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      model: string
+      tokensIn?: number
+      tokensOut?: number
+      costUsd?: number
+      latencyMs?: number
+      finishReason?: string
+      timestamp?: string
+    }
+  | {
+      type: 'artifact'
+      task?: AgentTaskSpec
+      session?: RuntimeSession
+      artifactId: string
+      name?: string
+      mimeType?: string
+      uri?: string
+      metadata?: Record<string, unknown>
+      timestamp?: string
+    }
+  | {
+      type: 'backend_error'
+      task: AgentTaskSpec
+      session?: RuntimeSession
+      backend: string
+      message: string
+      recoverable: boolean
+      timestamp: string
+    }
+  | {
+      type: 'backend_end'
+      task: AgentTaskSpec
+      session: RuntimeSession
+      backend: string
+      timestamp: string
+    }
+  | {
+      type: 'task_end'
+      task: AgentTaskSpec
+      status: AgentTaskStatus
+      reason: string
+      timestamp: string
+    }
+  | {
+      type: 'final'
+      task: AgentTaskSpec
+      session?: RuntimeSession
+      status: AgentTaskStatus
+      reason: string
+      text?: string
+      metadata?: Record<string, unknown>
+      timestamp: string
+    }
+
+/** @stable */
+export interface RuntimeSession {
+  id: string
+  backend: string
+  status: 'active' | 'completed' | 'failed' | 'aborted'
+  resumeToken?: string
+  createdAt: string
+  updatedAt: string
+  metadata?: Record<string, unknown>
+}
+
+/** @stable */
+export interface RuntimeSessionStore {
+  get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined
+  put(session: RuntimeSession): Promise<void> | void
+  appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void
+  listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[]
+}
+
+/** @stable */
+export interface AgentBackendInput {
+  task: AgentTaskSpec
+  message?: string
+  messages?: Array<{ role: string; content: string }>
+  inputs?: Record<string, unknown>
+}
+
+/** @stable */
+export interface AgentBackendContext {
+  task: AgentTaskSpec
+  knowledge: KnowledgeReadinessReport
+  session: RuntimeSession
+  signal?: AbortSignal
+}
+
+/** @stable */
+export interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
+  kind: string
+  start?(
+    input: TInput,
+    context: Omit<AgentBackendContext, 'session'> & { requestedSessionId?: string },
+  ): Promise<RuntimeSession> | RuntimeSession
+  resume?(
+    session: RuntimeSession,
+    input: TInput,
+    context: Omit<AgentBackendContext, 'session'>,
+  ): Promise<RuntimeSession> | RuntimeSession
+  stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>
+  stop?(session: RuntimeSession, reason: string): Promise<void> | void
+}
+
+/** @stable */
+export interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
+  task: AgentTaskSpec
+  backend: AgentExecutionBackend<TInput>
+  input?: Omit<TInput, 'task'>
+  knowledge?: AgentKnowledgeProvider
+  sessionStore?: RuntimeSessionStore
+  sessionId?: string
+  resume?: boolean
+  signal?: AbortSignal
+  minimumReadinessScore?: number
+}
+
+/** @stable */
+export interface RunAgentTaskOptions<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> {
+  task: AgentTaskSpec
+  adapter: AgentAdapter<TState, TAction, TActionResult, TEval>
+  knowledge?: AgentKnowledgeProvider
+  onEvent?: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>
+  store?: TraceStore
+  signal?: AbortSignal
+  scenarioId?: string
+  projectId?: string
+  variantId?: string
+  minimumReadinessScore?: number
+}
+
+/** @stable */
+export interface AgentTaskRunResult<
+  TState,
+  TAction,
+  TActionResult,
+  TEval extends ControlEvalResult = ControlEvalResult,
+> {
+  task: AgentTaskSpec
+  status: AgentTaskStatus
+  knowledge: KnowledgeReadinessReport
+  questions: UserQuestion[]
+  acquisitionPlans: DataAcquisitionPlan[]
+  userAnswers: Record<string, string>
+  acquiredEvidenceIds: string[]
+  control: ControlRunResult<TState, TAction, TActionResult, TEval>
+  runRecords: RunRecord[]
+}
+
+/** @stable */
+export interface AgentTaskRunSummary {
+  taskId: string
+  domain?: string
+  status: AgentTaskStatus
+  reason: string
+  readinessStatus: KnowledgeReadinessDecision['status']
+  readinessScore: number
+  recommendedAction: KnowledgeReadinessReport['recommendedAction']
+  blockingGapIds: string[]
+  nonBlockingGapIds: string[]
+  questionCount: number
+  acquisitionPlanCount: number
+  acquiredEvidenceCount: number
+  controlStepCount: number
+  pass: boolean
+  failureClass?: string
+  wallMs: number
+  costUsd: number
+}
+
+/** @stable */
+export interface KnowledgeReadinessDecision {
+  passed: boolean
+  status: 'ready' | 'blocked' | 'caveat'
+  reason: string
+  readinessScore: number
+  recommendedAction: KnowledgeReadinessReport['recommendedAction']
+  severity: KnowledgeReadinessReport['severity']
+  blockingGapIds: string[]
+  nonBlockingGapIds: string[]
+}
diff --git a/tests/runtime-run.test.ts b/tests/runtime-run.test.ts
new file mode 100644
index 0000000..abd1168
--- /dev/null
+++ b/tests/runtime-run.test.ts
@@ -0,0 +1,249 @@
+import { describe, expect, it, vi } from 'vitest'
+import {
+  type AgentTaskSpec,
+  type RuntimeRunPersistenceAdapter,
+  type RuntimeRunRow,
+  RuntimeRunStateError,
+  type RuntimeStreamEvent,
+  startRuntimeRun,
+  ValidationError,
+} from '../src/index'
+
+const task: AgentTaskSpec = {
+  id: 'task-runtime-run',
+  intent: 'Review the latest filing',
+  domain: 'legal',
+  metadata: { workspaceId: 'ws-1' },
+}
+
+function llmCall(
+  partial: Partial<Extract<RuntimeStreamEvent, { type: 'llm_call' }>>,
+): RuntimeStreamEvent {
+  return {
+    type: 'llm_call',
+    model: 'claude-sonnet-4-6',
+    timestamp: new Date(0).toISOString(),
+    ...partial,
+  }
+}
+
+describe('startRuntimeRun', () => {
+  it('rejects missing workspaceId and missing taskSpec.id', () => {
+    expect(() => startRuntimeRun({ workspaceId: '', taskSpec: task })).toThrow(ValidationError)
+    expect(() => startRuntimeRun({ workspaceId: 'ws-1', taskSpec: { ...task, id: '' } })).toThrow(
+      ValidationError,
+    )
+  })
+
+  it('initializes with a stable id and defaults to running status', () => {
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      sessionId: 'thread-1',
+      taskSpec: task,
+      id: 'run-fixed',
+      now: () => 100,
+    })
+
+    expect(handle.id).toBe('run-fixed')
+    expect(handle.workspaceId).toBe('ws-1')
+    expect(handle.sessionId).toBe('thread-1')
+    expect(handle.status).toBe('running')
+    expect(handle.cost()).toEqual({
+      tokensIn: 0,
+      tokensOut: 0,
+      costUsd: 0,
+      wallMs: 0,
+      llmCalls: 0,
+    })
+  })
+
+  it('accumulates an llm cost ledger from llm_call events and exposes wall time', () => {
+    let clock = 1000
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      id: 'cost-run',
+      now: () => clock,
+    })
+
+    clock = 1200
+    handle.observe(llmCall({ tokensIn: 100, tokensOut: 50, costUsd: 0.002 }))
+    clock = 1500
+    handle.observe(llmCall({ tokensIn: 80, tokensOut: 20, costUsd: 0.001 }))
+    clock = 1500
+    handle.observe(
+      // Non-cost events must not mutate the ledger.
+      {
+        type: 'tool_call',
+        toolName: 'shell',
+        timestamp: new Date(0).toISOString(),
+      },
+    )
+
+    const cost = handle.cost()
+    expect(cost.tokensIn).toBe(180)
+    expect(cost.tokensOut).toBe(70)
+    expect(cost.costUsd).toBeCloseTo(0.003, 9)
+    expect(cost.llmCalls).toBe(2)
+    expect(cost.wallMs).toBe(500)
+  })
+
+  it('ignores non-finite numbers on llm_call events without polluting the ledger', () => {
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      id: 'guard-run',
+      now: () => 0,
+    })
+    handle.observe(
+      llmCall({ tokensIn: Number.NaN, tokensOut: Number.POSITIVE_INFINITY, costUsd: 0.5 }),
+    )
+    const cost = handle.cost()
+    expect(cost.tokensIn).toBe(0)
+    expect(cost.tokensOut).toBe(0)
+    expect(cost.costUsd).toBe(0.5)
+    expect(cost.llmCalls).toBe(1)
+  })
+
+  it('completes idempotently with the same status and freezes wallMs at completion', () => {
+    let clock = 100
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      id: 'idempotent-run',
+      now: () => clock,
+    })
+    clock = 350
+    handle.complete({ status: 'completed', resultSummary: 'ok' })
+    expect(handle.status).toBe('completed')
+    const firstCost = handle.cost()
+
+    clock = 9999
+    // Same terminal status is a no-op (does not throw, does not update wallMs).
+    handle.complete({ status: 'completed', resultSummary: 'ok-again' })
+    expect(handle.status).toBe('completed')
+    const secondCost = handle.cost()
+    expect(firstCost.wallMs).toBe(250)
+    expect(secondCost.wallMs).toBe(250)
+  })
+
+  it('refuses to transition between two different terminal statuses', () => {
+    const handle = startRuntimeRun({ workspaceId: 'ws-1', taskSpec: task, id: 'no-time-travel' })
+    handle.complete({ status: 'completed', resultSummary: 'done' })
+    expect(() => handle.complete({ status: 'failed', error: 'too late' })).toThrow(
+      RuntimeRunStateError,
+    )
+  })
+
+  it('persist() refuses to run before complete()', async () => {
+    const handle = startRuntimeRun({ workspaceId: 'ws-1', taskSpec: task, id: 'must-complete' })
+    await expect(handle.persist()).rejects.toBeInstanceOf(RuntimeRunStateError)
+  })
+
+  it('persist() writes the canonical row to the adapter', async () => {
+    const rows: RuntimeRunRow[] = []
+    const adapter: RuntimeRunPersistenceAdapter = {
+      upsert(row) {
+        rows.push(row)
+      },
+    }
+    let clock = 0
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      sessionId: 'thread-1',
+      agentId: 'legal-chat-runtime',
+      taskSpec: task,
+      adapter,
+      id: 'persist-run',
+      scenarioId: 'legal-chat:thread-1',
+      now: () => clock,
+    })
+    clock = 50
+    handle.observe(llmCall({ tokensIn: 100, tokensOut: 50, costUsd: 0.001 }))
+    clock = 300
+    handle.complete({
+      status: 'completed',
+      resultSummary: 'reviewed',
+      metadata: { threadId: 'thread-1' },
+    })
+    await handle.persist({ runtimeEvents: [{ type: 'final' }] })
+
+    expect(rows).toHaveLength(1)
+    const row = rows[0]!
+    expect(row.id).toBe('persist-run')
+    expect(row.workspaceId).toBe('ws-1')
+    expect(row.sessionId).toBe('thread-1')
+    expect(row.agentId).toBe('legal-chat-runtime')
+    expect(row.domain).toBe('legal')
+    expect(row.taskId).toBe('task-runtime-run')
+    expect(row.scenarioId).toBe('legal-chat:thread-1')
+    expect(row.status).toBe('completed')
+    expect(row.resultSummary).toBe('reviewed')
+    expect(row.cost.costUsd).toBeCloseTo(0.001, 9)
+    expect(row.cost.tokensIn).toBe(100)
+    expect(row.cost.tokensOut).toBe(50)
+    expect(row.cost.llmCalls).toBe(1)
+    expect(row.cost.wallMs).toBe(300)
+    expect(row.completedAt).toBe(new Date(300).toISOString())
+    expect(row.metadata).toEqual({ threadId: 'thread-1', runtimeEvents: [{ type: 'final' }] })
+  })
+
+  it('persist() propagates adapter errors so callers can decide whether to retry', async () => {
+    const adapter: RuntimeRunPersistenceAdapter = {
+      upsert: vi.fn(() => {
+        throw new Error('postgres timeout')
+      }),
+    }
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      adapter,
+      id: 'persist-fail',
+    })
+    handle.complete({ status: 'failed', error: 'sandbox dropped' })
+    await expect(handle.persist()).rejects.toThrow('postgres timeout')
+  })
+
+  it('persist() is a no-op when no adapter is configured', async () => {
+    const handle = startRuntimeRun({ workspaceId: 'ws-1', taskSpec: task, id: 'no-adapter' })
+    handle.complete({ status: 'completed', resultSummary: 'ok' })
+    await expect(handle.persist()).resolves.toBeUndefined()
+  })
+
+  it('toRow() returns a row matching what persist() would write', async () => {
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      id: 'dry-run',
+      now: () => 0,
+    })
+    handle.observe(llmCall({ tokensIn: 10, tokensOut: 5, costUsd: 0.0001 }))
+    handle.complete({ status: 'completed', resultSummary: 'dry' })
+    const row = handle.toRow({ extra: 'value' })
+    expect(row.id).toBe('dry-run')
+    expect(row.status).toBe('completed')
+    expect(row.metadata).toEqual({ extra: 'value' })
+    expect(row.cost.tokensIn).toBe(10)
+  })
+
+  it('complete() with an explicit cost override replaces the accumulated ledger', () => {
+    const handle = startRuntimeRun({
+      workspaceId: 'ws-1',
+      taskSpec: task,
+      id: 'cost-override',
+      now: () => 0,
+    })
+    handle.observe(llmCall({ tokensIn: 9999, tokensOut: 9999, costUsd: 9.99 }))
+    handle.complete({
+      status: 'completed',
+      resultSummary: 'reconciled',
+      cost: { tokensIn: 100, tokensOut: 50, costUsd: 0.01, llmCalls: 1 },
+    })
+    expect(handle.cost()).toMatchObject({
+      tokensIn: 100,
+      tokensOut: 50,
+      costUsd: 0.01,
+      llmCalls: 1,
+    })
+  })
+})
diff --git a/tests/runtime.test.ts b/tests/runtime.test.ts
index 72e8da6..5a3a8a5 100644
--- a/tests/runtime.test.ts
+++ b/tests/runtime.test.ts
@@ -1,26 +1,26 @@
 import { describe, expect, it } from 'vitest'
 import {
+  type AgentAdapter,
+  type AgentBackendInput,
+  type AgentExecutionBackend,
+  type AgentTaskSpec,
+  type ControlEvalResult,
   createIterableBackend,
   createOpenAICompatibleBackend,
-  createSandboxPromptBackend,
   createRuntimeEventCollector,
   createRuntimeStreamEventCollector,
+  createSandboxPromptBackend,
   decideKnowledgeReadiness,
   encodeServerSentEvent,
   InMemoryRuntimeSessionStore,
+  type KnowledgeRequirement,
+  type RuntimeStreamEvent,
   readinessServerSentEvent,
   runAgentTask,
   runAgentTaskStream,
   sanitizeAgentRuntimeEvent,
   sanitizeRuntimeStreamEvent,
   summarizeAgentTaskRun,
-  type AgentAdapter,
-  type AgentExecutionBackend,
-  type AgentTaskSpec,
-  type ControlEvalResult,
-  type KnowledgeRequirement,
-  type RuntimeStreamEvent,
-  type AgentBackendInput,
 } from '../src/index'
 
 interface State {
@@ -48,16 +48,19 @@ function adapter(): AgentAdapter<State, Action, State, ControlEvalResult> {
   let current: State = { count: 0 }
   return {
     observe: () => current,
-    validate: ({ state }) => [{
-      id: 'count-ready',
-      passed: state.count >= 1,
-      score: state.count >= 1 ? 1 : 0,
-      severity: 'info',
-      objective: true,
-    }],
-    decide: ({ state }) => state.count >= 1
-      ? { type: 'stop', pass: true, score: 1, reason: 'done' }
-      : { type: 'continue', action: { type: 'increment' }, reason: 'need one step' },
+    validate: ({ state }) => [
+      {
+        id: 'count-ready',
+        passed: state.count >= 1,
+        score: state.count >= 1 ? 1 : 0,
+        severity: 'info',
+        objective: true,
+      },
+    ],
+    decide: ({ state }) =>
+      state.count >= 1
+        ? { type: 'stop', pass: true, score: 1, reason: 'done' }
+        : { type: 'continue', action: { type: 'increment' }, reason: 'need one step' },
     act: () => {
       current = { count: 1 }
       return current
@@ -94,15 +97,17 @@ describe('runAgentTask', () => {
       id: 'task-2',
       intent: 'deploy',
       domain: 'legal',
-      requiredKnowledge: [{
-        ...readyReq,
-        id: 'customer-secret',
-        description: 'Customer credential',
-        category: 'credential_or_secret',
-        acquisitionMode: 'ask_user',
-        sensitivity: 'secret',
-        currentConfidence: 0,
-      }],
+      requiredKnowledge: [
+        {
+          ...readyReq,
+          id: 'customer-secret',
+          description: 'Customer credential',
+          category: 'credential_or_secret',
+          acquisitionMode: 'ask_user',
+          sensitivity: 'secret',
+          currentConfidence: 0,
+        },
+      ],
       budget: { maxSteps: 3 },
     }
     let acted = false
@@ -197,13 +202,15 @@ describe('runAgentTask', () => {
     expect(result.userAnswers.question_build).toContain('pnpm')
     expect(result.acquiredEvidenceIds).toEqual(['page:build'])
     expect(result.knowledge.readinessScore).toBe(1)
-    expect(events).toEqual(expect.arrayContaining([
-      'questions_start',
-      'questions_end',
-      'acquisition_start',
-      'acquisition_end',
-      'control_step',
-    ]))
+    expect(events).toEqual(
+      expect.arrayContaining([
+        'questions_start',
+        'questions_end',
+        'acquisition_start',
+        'acquisition_end',
+        'control_step',
+      ]),
+    )
     expect(events.filter((event) => event === 'questions_end')).toHaveLength(1)
   })
 
@@ -213,15 +220,17 @@ describe('runAgentTask', () => {
       intent: 'collect secret then run',
       domain: 'test',
       inputs: { apiKey: 'sk-secret' },
-      requiredKnowledge: [{
-        ...readyReq,
-        id: 'api-key',
-        description: 'Customer API key',
-        category: 'credential_or_secret',
-        acquisitionMode: 'ask_user',
-        sensitivity: 'secret',
-        currentConfidence: 0,
-      }],
+      requiredKnowledge: [
+        {
+          ...readyReq,
+          id: 'api-key',
+          description: 'Customer API key',
+          category: 'credential_or_secret',
+          acquisitionMode: 'ask_user',
+          sensitivity: 'secret',
+          currentConfidence: 0,
+        },
+      ],
       budget: { maxSteps: 3 },
     }
     const collector = createRuntimeEventCollector()
@@ -272,15 +281,17 @@ describe('runAgentTask', () => {
         inputs: { customer: 'Acme' },
         requiredKnowledge: [readyReq],
       },
-      questions: [{
-        id: 'q1',
-        question: 'Please provide: Build command',
-        reason: 'Required for test.',
-        requirementId: 'build-command',
-        importance: 'blocking' as const,
-        answerType: 'free_text' as const,
-        impactIfUnknown: 'The agent should not run until this is known.',
-      }],
+      questions: [
+        {
+          id: 'q1',
+          question: 'Please provide: Build command',
+          reason: 'Required for test.',
+          requirementId: 'build-command',
+          importance: 'blocking' as const,
+          answerType: 'free_text' as const,
+          impactIfUnknown: 'The agent should not run until this is known.',
+        },
+      ],
       userAnswers: { q1: 'pnpm test' },
     }
 
@@ -312,17 +323,23 @@ describe('runAgentTask', () => {
     const caveatTask: AgentTaskSpec = {
       id: 'task-9',
       intent: 'caveat',
-      requiredKnowledge: [{
-        ...readyReq,
-        importance: 'medium',
-        currentConfidence: 0.2,
-        fallbackPolicy: 'continue_with_caveat',
-      }],
+      requiredKnowledge: [
+        {
+          ...readyReq,
+          importance: 'medium',
+          currentConfidence: 0.2,
+          fallbackPolicy: 'continue_with_caveat',
+        },
+      ],
     }
 
     const ready = await runAgentTask({ task: readyTask, adapter: adapter() })
     const blocked = await runAgentTask({ task: blockedTask, adapter: adapter() })
-    const caveat = await runAgentTask({ task: caveatTask, adapter: adapter(), minimumReadinessScore: 0 })
+    const caveat = await runAgentTask({
+      task: caveatTask,
+      adapter: adapter(),
+      minimumReadinessScore: 0,
+    })
 
     expect(decideKnowledgeReadiness(ready.knowledge).status).toBe('ready')
     expect(decideKnowledgeReadiness(blocked.knowledge).status).toBe('blocked')
@@ -330,9 +347,9 @@ describe('runAgentTask', () => {
   })
 
   it('encodes safe server-sent events for runtime telemetry streams', async () => {
-    expect(encodeServerSentEvent({ type: 'ping' }, { event: 'runtime\nbad', id: 'id\n1', retry: 1000 })).toBe(
-      'id: id 1\nevent: runtime bad\nretry: 1000\ndata: {"type":"ping"}\n\n',
-    )
+    expect(
+      encodeServerSentEvent({ type: 'ping' }, { event: 'runtime\nbad', id: 'id\n1', retry: 1000 }),
+    ).toBe('id: id 1\nevent: runtime bad\nretry: 1000\ndata: {"type":"ping"}\n\n')
     expect(encodeServerSentEvent('line one\nline two')).toBe('data: line one\ndata: line two\n\n')
   })
 
@@ -345,7 +362,9 @@ describe('runAgentTask', () => {
       },
       adapter: adapter(),
     })
-    const event = readinessServerSentEvent(result.knowledge, { includeRequirementDescriptions: true })
+    const event = readinessServerSentEvent(result.knowledge, {
+      includeRequirementDescriptions: true,
+    })
     const namedEvent = readinessServerSentEvent(result.knowledge, { event: 'readiness' })
 
     expect(event).not.toContain('event:')
@@ -412,21 +431,25 @@ describe('runAgentTask', () => {
     }
     const task = { id: 'stream-ready', intent: 'continue coding', requiredKnowledge: [readyReq] }
 
-    const first = await collect(runAgentTaskStream({
-      task,
-      backend,
-      input: { message: 'hello' },
-      sessionStore: store,
-      sessionId: 'session-1',
-    }))
-    const second = await collect(runAgentTaskStream({
-      task,
-      backend,
-      input: { message: ' again' },
-      sessionStore: store,
-      sessionId: 'session-1',
-      resume: true,
-    }))
+    const first = await collect(
+      runAgentTaskStream({
+        task,
+        backend,
+        input: { message: 'hello' },
+        sessionStore: store,
+        sessionId: 'session-1',
+      }),
+    )
+    const second = await collect(
+      runAgentTaskStream({
+        task,
+        backend,
+        input: { message: ' again' },
+        sessionStore: store,
+        sessionId: 'session-1',
+        resume: true,
+      }),
+    )
 
     expect(first.find((event) => event.type === 'session_created')).toBeDefined()
     expect(second.find((event) => event.type === 'session_resumed')).toBeDefined()
@@ -436,7 +459,9 @@ describe('runAgentTask', () => {
 
     const toolCall = first.find((event) => event.type === 'tool_call')!
     expect(JSON.stringify(sanitizeRuntimeStreamEvent(toolCall))).not.toContain('secret.ts')
-    expect(JSON.stringify(sanitizeRuntimeStreamEvent(toolCall, { includeControlPayloads: true }))).toContain('secret.ts')
+    expect(
+      JSON.stringify(sanitizeRuntimeStreamEvent(toolCall, { includeControlPayloads: true })),
+    ).toContain('secret.ts')
   })
 
   it('maps sandbox prompt events into runtime stream events', async () => {
@@ -449,18 +474,25 @@ describe('runAgentTask', () => {
         yield { type: 'tool_result', data: { name: 'Read', output: 'ok' } }
       },
     })
-    const events = await collect(runAgentTaskStream({
-      task: { id: 'sandbox-task', intent: 'inspect', requiredKnowledge: [readyReq] },
-      backend,
-      input: { message: 'go' },
-    }))
+    const events = await collect(
+      runAgentTaskStream({
+        task: { id: 'sandbox-task', intent: 'inspect', requiredKnowledge: [readyReq] },
+        backend,
+        input: { message: 'go' },
+      }),
+    )
 
     expect(events.find((event) => event.type === 'session_created')).toMatchObject({
       type: 'session_created',
       session: { id: 'box-1', backend: 'sandbox' },
     })
-    expect(events.filter((event) => event.type === 'text_delta').map((event) => event.text)).toEqual(['hi'])
-    expect(events.find((event) => event.type === 'tool_call')).toMatchObject({ type: 'tool_call', toolName: 'Read' })
+    expect(
+      events.filter((event) => event.type === 'text_delta').map((event) => event.text),
+    ).toEqual(['hi'])
+    expect(events.find((event) => event.type === 'tool_call')).toMatchObject({
+      type: 'tool_call',
+      toolName: 'Read',
+    })
   })
 
   it('parses OpenAI-compatible streamed chat completions', async () => {
@@ -468,20 +500,28 @@ describe('runAgentTask', () => {
       apiKey: 'sk-test',
       baseUrl: 'https://router.example/v1',
       model: 'model-a',
-      fetchImpl: async () => new Response(
-        'data: {"choices":[{"delta":{"content":"hel"}}]}\n\n'
-        + 'data: {"choices":[{"delta":{"content":"lo"}}]}\n\n'
-        + 'data: [DONE]\n\n',
-        { status: 200 },
-      ),
+      fetchImpl: async () =>
+        new Response(
+          'data: {"choices":[{"delta":{"content":"hel"}}]}\n\n' +
+            'data: {"choices":[{"delta":{"content":"lo"}}]}\n\n' +
+            'data: [DONE]\n\n',
+          { status: 200 },
+        ),
     })
-    const events = await collect(runAgentTaskStream({
-      task: { id: 'chat-task', intent: 'say hello', requiredKnowledge: [readyReq] },
-      backend,
-      input: { message: 'hello' },
-    }))
+    const events = await collect(
+      runAgentTaskStream({
+        task: { id: 'chat-task', intent: 'say hello', requiredKnowledge: [readyReq] },
+        backend,
+        input: { message: 'hello' },
+      }),
+    )
 
-    expect(events.filter((event) => event.type === 'text_delta').map((event) => event.text).join('')).toBe('hello')
+    expect(
+      events
+        .filter((event) => event.type === 'text_delta')
+        .map((event) => event.text)
+        .join(''),
+    ).toBe('hello')
     expect(events.at(-1)).toMatchObject({ type: 'final', status: 'completed', text: 'hello' })
   })
 
@@ -498,16 +538,21 @@ describe('runAgentTask', () => {
         throw new Error('sandbox lost')
       },
     }
-    const events = await collect(runAgentTaskStream({
-      task: { id: 'failing-task', intent: 'run', requiredKnowledge: [readyReq] },
-      backend,
-      sessionStore: store,
-      sessionId: 'failing-session',
-    }))
+    const events = await collect(
+      runAgentTaskStream({
+        task: { id: 'failing-task', intent: 'run', requiredKnowledge: [readyReq] },
+        backend,
+        sessionStore: store,
+        sessionId: 'failing-session',
+      }),
+    )
 
     expect(stopped).toEqual(['sandbox lost'])
     expect(store.get('failing-session')?.status).toBe('failed')
-    expect(store.listEvents('failing-session').at(-1)).toMatchObject({ type: 'final', status: 'failed' })
+    expect(store.listEvents('failing-session').at(-1)).toMatchObject({
+      type: 'final',
+      status: 'failed',
+    })
     expect(events.find((event) => event.type === 'backend_error')).toMatchObject({
       type: 'backend_error',
       backend: 'failing-harness',
@@ -522,10 +567,40 @@ describe('runAgentTask', () => {
     const backend = createIterableBackend<AgentBackendInput>({
       kind: 'fake-stream',
       async *stream(_input, ctx) {
-        yield { type: 'tool_call', task: ctx.task, session: ctx.session, toolName: 'shell', args: { cmd: 'rm -rf /etc/secret.txt' }, timestamp: '2026-05-10T00:00:00.000Z' }
-        yield { type: 'tool_result', task: ctx.task, session: ctx.session, toolName: 'shell', result: { stdout: 'sk-leaked' }, timestamp: '2026-05-10T00:00:00.000Z' }
-        yield { type: 'artifact', task: ctx.task, session: ctx.session, artifactId: 'a1', name: 'report.json', mimeType: 'application/json', uri: 's3://internal/secret-bucket/key', metadata: { customerId: 'cust-99' }, timestamp: '2026-05-10T00:00:00.000Z' }
-        yield { type: 'text_delta', task: ctx.task, session: ctx.session, text: 'hi from agent', timestamp: '2026-05-10T00:00:00.000Z' }
+        yield {
+          type: 'tool_call',
+          task: ctx.task,
+          session: ctx.session,
+          toolName: 'shell',
+          args: { cmd: 'rm -rf /etc/secret.txt' },
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
+        yield {
+          type: 'tool_result',
+          task: ctx.task,
+          session: ctx.session,
+          toolName: 'shell',
+          result: { stdout: 'sk-leaked' },
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
+        yield {
+          type: 'artifact',
+          task: ctx.task,
+          session: ctx.session,
+          artifactId: 'a1',
+          name: 'report.json',
+          mimeType: 'application/json',
+          uri: 's3://internal/secret-bucket/key',
+          metadata: { customerId: 'cust-99' },
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
+        yield {
+          type: 'text_delta',
+          task: ctx.task,
+          session: ctx.session,
+          text: 'hi from agent',
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
       },
     })
     const task: AgentTaskSpec = {
@@ -566,8 +641,24 @@ describe('runAgentTask', () => {
     const backend = createIterableBackend<AgentBackendInput>({
       kind: 'fake-stream',
       async *stream(_input, ctx) {
-        yield { type: 'tool_call', task: ctx.task, session: ctx.session, toolName: 'shell', args: { cmd: 'pnpm test' }, timestamp: '2026-05-10T00:00:00.000Z' }
-        yield { type: 'artifact', task: ctx.task, session: ctx.session, artifactId: 'a1', name: 'r.json', uri: 's3://bucket/key', metadata: { customerId: 'cust-1' }, timestamp: '2026-05-10T00:00:00.000Z' }
+        yield {
+          type: 'tool_call',
+          task: ctx.task,
+          session: ctx.session,
+          toolName: 'shell',
+          args: { cmd: 'pnpm test' },
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
+        yield {
+          type: 'artifact',
+          task: ctx.task,
+          session: ctx.session,
+          artifactId: 'a1',
+          name: 'r.json',
+          uri: 's3://bucket/key',
+          metadata: { customerId: 'cust-1' },
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
       },
     })
     const task: AgentTaskSpec = {
@@ -591,7 +682,13 @@ describe('runAgentTask', () => {
     const backend = createIterableBackend<AgentBackendInput>({
       kind: 'fake-stream',
       async *stream(_input, ctx) {
-        yield { type: 'text_delta', task: ctx.task, session: ctx.session, text: 'partial', timestamp: '2026-05-10T00:00:00.000Z' }
+        yield {
+          type: 'text_delta',
+          task: ctx.task,
+          session: ctx.session,
+          text: 'partial',
+          timestamp: '2026-05-10T00:00:00.000Z',
+        }
       },
     })
     for await (const event of runAgentTaskStream({
@@ -619,14 +716,20 @@ describe('runAgentTask', () => {
       stop: () => {
         throw new Error('cleanup refused')
       },
+      // Regression test: a stream generator that throws before any yield
+      // exercises the runtime's empty-event cleanup path (backend_error /
+      // task_end / final must still flow).
+      // biome-ignore lint/correctness/useYield: see comment above
       async *stream() {
         throw new Error('primary stream failure')
       },
     }
-    const events = await collect(runAgentTaskStream({
-      task: { id: 'cleanup-failure-task', intent: 'run', requiredKnowledge: [readyReq] },
-      backend,
-    }))
+    const events = await collect(
+      runAgentTaskStream({
+        task: { id: 'cleanup-failure-task', intent: 'run', requiredKnowledge: [readyReq] },
+        backend,
+      }),
+    )
 
     expect(events.find((event) => event.type === 'backend_error')).toMatchObject({
       type: 'backend_error',
diff --git a/tests/trace-bridge.test.ts b/tests/trace-bridge.test.ts
new file mode 100644
index 0000000..7ccb7ae
--- /dev/null
+++ b/tests/trace-bridge.test.ts
@@ -0,0 +1,208 @@
+import { describe, expect, it } from 'vitest'
+import {
+  createTraceBridge,
+  type RuntimeStreamEvent,
+  toAgentEvalTrace,
+  ValidationError,
+} from '../src/index'
+
+const task = { id: 'task-1', intent: 'Run a chat turn', domain: 'legal' }
+const session = {
+  id: 'thread-1',
+  backend: 'tcloud',
+  status: 'active' as const,
+  createdAt: '2026-05-10T00:00:00.000Z',
+  updatedAt: '2026-05-10T00:00:00.000Z',
+}
+
+describe('createTraceBridge', () => {
+  it('rejects construction without a runId', () => {
+    expect(() => createTraceBridge({ runId: '' })).toThrow(ValidationError)
+  })
+
+  it('maps lifecycle events to log-kind trace events with the runId stamped', () => {
+    const bridge = createTraceBridge({ runId: 'run-1', spanId: 'span-1' })
+    const ts = '2026-05-10T00:00:00.000Z'
+    const taskStart = bridge.toTraceEvent({ type: 'task_start', task, timestamp: ts })
+    expect(taskStart).toMatchObject({
+      runId: 'run-1',
+      spanId: 'span-1',
+      kind: 'log',
+      payload: { phase: 'task_start', taskId: 'task-1' },
+    })
+    expect(taskStart?.timestamp).toBe(Date.parse(ts))
+  })
+
+  it('maps readiness_end to policy_violation when the decision is blocked', () => {
+    const bridge = createTraceBridge({ runId: 'run-2' })
+    const trace = bridge.toTraceEvent({
+      type: 'readiness_end',
+      task,
+      timestamp: '2026-05-10T00:00:00.000Z',
+      knowledge: {
+        taskId: 'task-1',
+        readinessScore: 0,
+        reason: 'missing',
+        severity: 'error',
+        recommendedAction: 'collect_missing_data',
+        blockingMissingRequirements: [],
+        nonBlockingGaps: [],
+        bundle: {
+          taskId: 'task-1',
+          readinessScore: 0,
+          missing: [],
+          evidence: [],
+          evidenceIds: [],
+          userAnswers: {},
+        },
+      },
+      decision: {
+        passed: false,
+        status: 'blocked',
+        reason: 'missing',
+        readinessScore: 0,
+        recommendedAction: 'collect_missing_data',
+        severity: 'error',
+        blockingGapIds: ['missing-doc'],
+        nonBlockingGapIds: [],
+      },
+    })
+    expect(trace?.kind).toBe('policy_violation')
+    expect(trace?.payload).toMatchObject({
+      status: 'blocked',
+      blockingGapIds: ['missing-doc'],
+    })
+  })
+
+  it('maps backend_error and failed task_end to error kind', () => {
+    const bridge = createTraceBridge({ runId: 'run-3' })
+    const backendError = bridge.toTraceEvent({
+      type: 'backend_error',
+      task,
+      session,
+      backend: 'tcloud',
+      message: 'sandbox lost',
+      recoverable: true,
+      timestamp: '2026-05-10T00:00:00.000Z',
+    })
+    expect(backendError?.kind).toBe('error')
+    expect(backendError?.payload).toMatchObject({ message: 'sandbox lost', recoverable: true })
+
+    const failedTaskEnd = bridge.toTraceEvent({
+      type: 'task_end',
+      task,
+      status: 'failed',
+      reason: 'sandbox lost',
+      timestamp: '2026-05-10T00:00:00.000Z',
+    })
+    expect(failedTaskEnd?.kind).toBe('error')
+  })
+
+  it('drops text_delta and reasoning_delta — they belong inside an LlmSpan', () => {
+    const bridge = createTraceBridge({ runId: 'run-4' })
+    expect(
+      bridge.toTraceEvent({
+        type: 'text_delta',
+        task,
+        session,
+        text: 'hi',
+        timestamp: '2026-05-10T00:00:00.000Z',
+      }),
+    ).toBeUndefined()
+    expect(
+      bridge.toTraceEvent({
+        type: 'reasoning_delta',
+        task,
+        session,
+        text: 'thinking',
+        timestamp: '2026-05-10T00:00:00.000Z',
+      }),
+    ).toBeUndefined()
+  })
+
+  it('maps llm_call into a log-kind trace event carrying tokens + cost', () => {
+    const bridge = createTraceBridge({ runId: 'run-5' })
+    const trace = bridge.toTraceEvent({
+      type: 'llm_call',
+      task,
+      session,
+      model: 'claude-sonnet-4-6',
+      tokensIn: 100,
+      tokensOut: 50,
+      costUsd: 0.001,
+      latencyMs: 320,
+      finishReason: 'stop',
+      timestamp: '2026-05-10T00:00:00.000Z',
+    })
+    expect(trace?.kind).toBe('log')
+    expect(trace?.payload).toMatchObject({
+      phase: 'llm_call',
+      model: 'claude-sonnet-4-6',
+      tokensIn: 100,
+      tokensOut: 50,
+      costUsd: 0.001,
+    })
+  })
+
+  it('drain() projects a full stream into trace events in order', () => {
+    const bridge = createTraceBridge({ runId: 'run-6' })
+    const events: RuntimeStreamEvent[] = [
+      { type: 'task_start', task, timestamp: '2026-05-10T00:00:00.000Z' },
+      { type: 'readiness_start', task, timestamp: '2026-05-10T00:00:01.000Z' },
+      {
+        type: 'text_delta',
+        task,
+        session,
+        text: 'dropped',
+        timestamp: '2026-05-10T00:00:02.000Z',
+      },
+      {
+        type: 'task_end',
+        task,
+        status: 'completed',
+        reason: 'ok',
+        timestamp: '2026-05-10T00:00:03.000Z',
+      },
+    ]
+    const traces = bridge.drain(events)
+    expect(traces.map((trace) => trace.payload.phase)).toEqual([
+      'task_start',
+      'readiness_start',
+      'task_end',
+    ])
+    expect(traces.map((trace) => trace.eventId)).toEqual(['evt-1', 'evt-2', 'evt-3'])
+  })
+
+  it('toAgentEvalTrace() one-shot matches createTraceBridge.toTraceEvent()', () => {
+    const event: RuntimeStreamEvent = {
+      type: 'task_start',
+      task,
+      timestamp: '2026-05-10T00:00:00.000Z',
+    }
+    const oneShot = toAgentEvalTrace(event, { runId: 'run-7' })
+    expect(oneShot).toMatchObject({ runId: 'run-7', kind: 'log' })
+  })
+
+  it('falls back to Date.now() when an event lacks a timestamp', () => {
+    const bridge = createTraceBridge({ runId: 'run-8' })
+    const before = Date.now()
+    const trace = bridge.toTraceEvent({
+      type: 'text_delta',
+      task,
+      session,
+      text: 'untimed',
+    })
+    expect(trace).toBeUndefined()
+
+    // tool_call is the simplest event whose timestamp may legitimately be
+    // omitted by a backend; the bridge must still produce a valid trace.
+    const toolCall = bridge.toTraceEvent({
+      type: 'tool_call',
+      task,
+      session,
+      toolName: 'shell',
+    })
+    expect(toolCall).toBeDefined()
+    expect(toolCall!.timestamp).toBeGreaterThanOrEqual(before)
+  })
+})
diff --git a/tsconfig.json b/tsconfig.json
index 51a8087..a8b383f 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -16,7 +16,8 @@
     "isolatedModules": true,
     "noUnusedLocals": true,
     "noUnusedParameters": true,
-    "noFallthroughCasesInSwitch": true
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedIndexedAccess": true
   },
   "include": ["src"],
   "exclude": ["node_modules", "dist", "tests"]