From c7e58078d5470c31c480889d5824d451378d4e20 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 17 Apr 2026 18:59:18 -0700 Subject: [PATCH 01/31] Freebuff waiting room backend --- docs/freebuff-waiting-room.md | 282 ++ .../db/migrations/0043_vengeful_boomer.sql | 15 + .../src/db/migrations/meta/0043_snapshot.json | 3202 +++++++++++++++++ web/instrumentation.ts | 12 +- .../completions/__tests__/completions.test.ts | 31 +- web/src/app/api/v1/chat/completions/_post.ts | 37 + .../session/__tests__/session.test.ts | 131 + .../app/api/v1/freebuff/session/_handlers.ts | 98 + web/src/app/api/v1/freebuff/session/route.ts | 22 + web/src/llm-api/types.ts | 9 +- .../free-session/__tests__/admission.test.ts | 94 + .../free-session/__tests__/public-api.test.ts | 293 ++ .../__tests__/session-view.test.ts | 110 + web/src/server/free-session/admission.ts | 175 + web/src/server/free-session/config.ts | 29 + web/src/server/free-session/public-api.ts | 184 + web/src/server/free-session/session-view.ts | 66 + web/src/server/free-session/store.ts | 231 ++ web/src/server/free-session/types.ts | 36 + 19 files changed, 5053 insertions(+), 4 deletions(-) create mode 100644 docs/freebuff-waiting-room.md create mode 100644 packages/internal/src/db/migrations/0043_vengeful_boomer.sql create mode 100644 packages/internal/src/db/migrations/meta/0043_snapshot.json create mode 100644 web/src/app/api/v1/freebuff/session/__tests__/session.test.ts create mode 100644 web/src/app/api/v1/freebuff/session/_handlers.ts create mode 100644 web/src/app/api/v1/freebuff/session/route.ts create mode 100644 web/src/server/free-session/__tests__/admission.test.ts create mode 100644 web/src/server/free-session/__tests__/public-api.test.ts create mode 100644 web/src/server/free-session/__tests__/session-view.test.ts create mode 100644 web/src/server/free-session/admission.ts create mode 100644 web/src/server/free-session/config.ts create mode 100644 web/src/server/free-session/public-api.ts create mode 100644 web/src/server/free-session/session-view.ts create mode 100644 web/src/server/free-session/store.ts create mode 100644 web/src/server/free-session/types.ts diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md new file mode 100644 index 0000000000..73ebe79b65 --- /dev/null +++ b/docs/freebuff-waiting-room.md @@ -0,0 +1,282 @@ +# Freebuff Waiting Room + +## Overview + +The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs: + +1. **Bound concurrency** — cap the number of simultaneously-active free users so one deployment does not degrade under load. +2. **Gate on upstream health** — only admit new users while the Fireworks deployment is reporting `healthy` (via the separate monitor in `web/src/server/fireworks-monitor/`). +3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. + +Users who cannot be admitted immediately are placed in a FIFO queue and given an estimated wait time. Admitted users get a fixed-length session (default 1h) during which they can make free-mode requests subject to the existing per-user rate limits. + +The entire system is gated by the env flag `FREEBUFF_WAITING_ROOM_ENABLED`. When `false`, the gate is a no-op and the admission ticker does not start; free-mode traffic flows through unchanged. + +## Kill Switch + +```bash +# Disable entirely (both the gate on chat/completions and the admission loop) +FREEBUFF_WAITING_ROOM_ENABLED=false + +# Other knobs (only read when enabled) +FREEBUFF_SESSION_LENGTH_MS=3600000 # 1 hour +FREEBUFF_MAX_CONCURRENT_SESSIONS=50 +``` + +Flipping the flag is safe at runtime: existing rows stay in the DB and will be admitted / expired correctly whenever the flag is flipped back on. + +## Architecture + +```mermaid +flowchart LR + CLI[freebuff CLI] + SessionAPI["/api/v1/freebuff/session
(GET, POST, DELETE)"] + ChatAPI["/api/v1/chat/completions"] + Gate[checkSessionAdmissible] + Ticker[Admission Ticker
every 5s, 1 pod] + Store[(free_session
Postgres)] + Monitor[FireworksMonitor
isFireworksAdmissible] + + CLI -- "POST on startup
(gets instance_id)" --> SessionAPI + CLI -- "GET to poll state" --> SessionAPI + CLI -- "chat requests
include instance_id" --> ChatAPI + SessionAPI --> Store + ChatAPI --> Gate + Gate --> Store + Ticker --> Store + Ticker --> Monitor +``` + +### Components + +- **`free_session` table** (Postgres) — single source of truth for queue + active-session state. One row per user (PK on `user_id`). +- **Public API** (`web/src/server/free-session/public-api.ts`) — `requestSession`, `getSessionState`, `endUserSession`, `checkSessionAdmissible`. Pure business logic; DI-friendly. +- **Store** (`web/src/server/free-session/store.ts`) — all DB ops. Transaction boundaries and advisory locks live here. +- **Admission ticker** (`web/src/server/free-session/admission.ts`) — self-scheduling timer that runs every 5s, sweeps expired rows, and admits queued users up to capacity. +- **HTTP routes** (`web/src/app/api/v1/freebuff/session/`) — thin wrappers that resolve the API key → `userId` and delegate to the public API. +- **Chat-completions gate** (`web/src/app/api/v1/chat/completions/_post.ts`) — for free-mode requests, calls `checkSessionAdmissible(userId, claimedInstanceId)` after the rate-limit check and rejects non-admissible requests with a structured error. + +## Database Schema + +```sql +CREATE TYPE free_session_status AS ENUM ('queued', 'active'); + +CREATE TABLE free_session ( + user_id text PRIMARY KEY REFERENCES "user"(id) ON DELETE CASCADE, + status free_session_status NOT NULL, + active_instance_id text NOT NULL, + queued_at timestamptz NOT NULL DEFAULT now(), + admitted_at timestamptz, + expires_at timestamptz, + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX idx_free_session_queue ON free_session (status, queued_at); +CREATE INDEX idx_free_session_expiry ON free_session (expires_at); +``` + +Migration: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql`. + +**Design notes** + +- **PK on `user_id`** is the structural enforcement of "one session per account". No app-logic race can produce two rows for one user. +- **`active_instance_id`** rotates on every `POST /session` call. This is how we enforce one-CLI-at-a-time (see [Single-instance enforcement](#single-instance-enforcement)). +- **All timestamps server-supplied.** The client never sends `queued_at`, `admitted_at`, or `expires_at` — they are either `DEFAULT now()` or computed server-side during admission. +- **FK CASCADE on user delete** keeps the table clean without a background job. + +## State Machine + +```mermaid +stateDiagram-v2 + [*] --> queued: POST /session
(first call) + queued --> active: admission tick
(capacity + healthy) + active --> expired: expires_at < now() + expired --> queued: POST /session
(re-queue at back) + queued --> [*]: DELETE /session + active --> [*]: DELETE /session
or admission sweep +``` + +There is no stored `expired` status. An `active` row whose `expires_at` is in the past is treated as expired by `checkSessionAdmissible` and swept by the admission ticker. + +## Single-instance Enforcement + +The challenge: a user running two CLIs on the same account should not get 2× throughput. + +The PK on `user_id` gives us one session row per user, but both CLIs could share that row and double up their request rate (bounded only by the per-user rate limiter, which isn't ideal). + +The solution: `active_instance_id`. + +1. On startup, the CLI calls `POST /api/v1/freebuff/session`. The server generates a fresh UUID (`active_instance_id`), stores it, and returns it. +2. Every subsequent chat request includes that id in `codebuff_metadata.freebuff_instance_id`. +3. `checkSessionAdmissible` rejects the request with `session_superseded` (HTTP 409) if the claimed id doesn't match the stored one. +4. When the user starts a second CLI, it calls `POST /session`, which rotates `active_instance_id`. The first CLI's subsequent request hits 409, so only the latest CLI can actually make chat requests. + +The rotation is important: it happens even if the caller is already in the `active` state, so a second CLI always wins. Any other design (first-wins, take-over-requires-force-flag) would allow the attacker to keep the old CLI alive forever. + +### What this does NOT prevent + +- A single user manually syncing `instance_id` between two CLIs (e.g. editing a config file). This is possible but requires them to re-sync after every startup call, so it's high-friction. We accept this. +- A user creating multiple accounts. That is covered by other gates (MIN_ACCOUNT_AGE_FOR_PAID_MS, geo check) and the Fireworks monitor's overall throttle. + +## Admission Loop + +One pod runs the admission loop at a time, coordinated via Postgres advisory lock. All pods start a ticker on boot, but each tick acquires `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)` inside a transaction; if already held, the tick is a no-op on that pod. The lock is automatically released when the transaction commits. + +Each tick does (in order): + +1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now()`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage. +2. **Check upstream health.** `isFireworksAdmissible()` from the monitor. If not `healthy`, skip admission for this tick (queue grows; users see `status: 'queued'` with increasing position). +3. **Measure capacity.** `capacity = min(MAX_CONCURRENT - activeCount, MAX_ADMITS_PER_TICK)`. `MAX_ADMITS_PER_TICK=20` caps thundering-herd admission when a large block of sessions expires simultaneously. +4. **Admit.** `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT capacity FOR UPDATE SKIP LOCKED`, then `UPDATE` those rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. + +### Tunables + +| Constant | Location | Default | Purpose | +|---|---|---|---| +| `ADMISSION_TICK_MS` | `config.ts` | 5000 | How often the ticker fires | +| `MAX_ADMITS_PER_TICK` | `config.ts` | 20 | Upper bound on admits per tick | +| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | +| `FREEBUFF_MAX_CONCURRENT_SESSIONS` | env | 50 | Global active-session cap | + +## HTTP API + +All endpoints authenticate via the standard `Authorization: Bearer ` or `x-codebuff-api-key` header. + +### `POST /api/v1/freebuff/session` + +**Called by the CLI on startup.** Idempotent. Semantics: + +- No existing row → create with `status='queued'`, fresh `active_instance_id`, `queued_at=now()`. +- Existing queued row → rotate `active_instance_id`, preserve `queued_at` (no queue jump). +- Existing active+unexpired row → rotate `active_instance_id`, preserve `status`/`admitted_at`/`expires_at`. +- Existing active+expired row → reset to queued with fresh `queued_at` (re-queue at back). + +Response shapes: + +```jsonc +// Waiting room disabled — CLI should treat this as "always admitted" +{ "status": "disabled" } + +// In queue +{ + "status": "queued", + "instanceId": "e47…", + "position": 17, // 1-indexed + "queueDepth": 43, + "estimatedWaitMs": 3600000, + "queuedAt": "2026-04-17T12:00:00Z" +} + +// Admitted +{ + "status": "active", + "instanceId": "e47…", + "admittedAt": "2026-04-17T12:00:00Z", + "expiresAt": "2026-04-17T13:00:00Z", + "remainingMs": 3600000 +} +``` + +### `GET /api/v1/freebuff/session` + +**Read-only polling.** Does not mutate `active_instance_id`. The CLI uses this to refresh the countdown / queue position. Returns the same shapes as POST, plus: + +```jsonc +// User has no row at all — must call POST first +{ "status": "none", "message": "Call POST to join the waiting room." } +``` + +### `DELETE /api/v1/freebuff/session` + +**End session immediately.** Deletes the row; the freed slot is picked up by the next admission tick. + +Response: `{ "status": "ended" }`. + +## Chat Completions Gate + +For free-mode requests (`codebuff_metadata.cost_mode === 'free'`), `_post.ts` calls `checkSessionAdmissible` after the per-user rate limiter and before the subscriber block-grant check. + +### Response codes + +| HTTP | `error` | When | +|---|---|---| +| 428 | `waiting_room_required` | No session row exists. Client should call POST /session. | +| 429 | `waiting_room_queued` | Row exists with `status='queued'`. Client should keep polling GET. | +| 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. | +| 410 | `session_expired` | Row exists with `status='active'` but `expires_at < now()`. Client should POST /session to re-queue. | + +When the waiting room is disabled, the gate returns `{ ok: true, reason: 'disabled' }` without touching the DB. + +## Estimated Wait Time + +Computed in `session-view.ts` as an **upper bound** that assumes uniform session expiry: + +``` +waves = floor((position - 1) / maxConcurrent) +waitMs = waves * sessionLengthMs +``` + +- Position 1..`maxConcurrent` → 0 (next tick will admit them) +- Position `maxConcurrent`+1..`2*maxConcurrent` → one full session length +- and so on. + +Actual wait is usually shorter because users call `DELETE /session` on CLI exit and sessions turn over naturally. We show an upper bound because under-promising on wait time is better UX than surprise delays. + +## CLI Integration (frontend-side contract) + +Not implemented yet. When the CLI is updated, it should: + +1. **On startup**, call `POST /api/v1/freebuff/session`. Store `instanceId` in memory (not on disk — startup must re-admit). +2. **Loop while `status === 'queued'`:** poll `GET /api/v1/freebuff/session` every ~5s and render `position / queueDepth / estimatedWaitMs` to the user. +3. **When `status === 'active'`**, start rendering `remainingMs` as a countdown. Re-poll GET every ~30s to stay honest with server-side state. +4. **On every chat request**, include `codebuff_metadata.freebuff_instance_id: `. +5. **Handle gate errors:** + - `session_superseded` (409) → surface "another freebuff instance has taken over; exiting" and shut down. + - `session_expired` (410) → go back to step 1 (re-admit into queue). + - `waiting_room_queued` (429) → shouldn't happen under normal flow but recoverable by polling GET. + - `waiting_room_required` (428) → shouldn't happen either; call POST. +6. **On clean exit**, call `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner. + +The `disabled` response means the server has the waiting room turned off. CLI should treat it identically to `active` with infinite remaining time — do not show a countdown, and include a dummy/empty `freebuff_instance_id` (the server ignores it). + +## Multi-pod Behavior + +- **`/api/v1/freebuff/session` routes** are stateless per pod; all state lives in Postgres. Any pod can serve any request. +- **Chat completions gate** is a single `SELECT` per free-mode request. At high QPS this is the hottest path — the `user_id` PK lookup is O(1). If it ever becomes a problem, the obvious fix is to cache the session row for ~1s per pod. +- **Admission loop** runs on every pod but is serialized by `pg_try_advisory_xact_lock`. At any given tick, exactly one pod actually admits; the rest early-return. + +## Abuse Resistance Summary + +| Attack | Mitigation | +|---|---| +| Multiple sessions per account | PK on `user_id` — structurally impossible | +| Multiple CLIs sharing one session | `active_instance_id` rotates on POST; stale id → 409 | +| Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) | +| Queue jumping via timestamp manipulation | `queued_at` is server-supplied; FIFO order is server-determined | +| Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users | +| Two pods admitting the same user | `SELECT ... FOR UPDATE SKIP LOCKED` + advisory xact lock | +| Spamming POST/GET to starve admission tick | Admission uses Postgres advisory lock; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | +| Low-traffic error-fraction flapping blocking admissions | Health monitor has `minRequestRateForErrorCheck` floor (see `fireworks-monitor`) | +| Monitor down / metrics stale | `isFireworksAdmissible()` fails closed → admission pauses, queue grows | +| Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | + +## Testing + +Pure logic covered by `web/src/server/free-session/__tests__/*.test.ts`: + +- `session-view.test.ts` — wait-time estimation, row→response mapping +- `public-api.test.ts` — all status transitions via in-memory DI store +- `admission.test.ts` — tick behaviour with mocked store + health checks + +Handler tests in `web/src/app/api/v1/freebuff/session/__tests__/session.test.ts` cover auth + request routing with a mocked `SessionDeps`. + +The real store (`store.ts`) and admission loop ticker (`admission.ts` — the scheduling wrapper around `runAdmissionTick`) are not directly unit-tested because they're thin glue over Postgres and `setTimeout`. Integration-level validation of the store requires a Postgres instance and is left for the e2e harness. + +## Known Gaps / Future Work + +- **No rate limit on `/session` itself.** A determined user could spam POST/GET. Current throughput is bounded by general per-IP limits upstream, but this should be tightened before large rollouts. +- **Estimated wait is coarse.** Could be improved by tracking actual admission rate over the last N minutes. +- **No admin UI.** To inspect queue depth, active count, or kick a user, you currently need DB access. A small admin endpoint under `/api/admin/freebuff/*` is a natural add. +- **No metrics exposure.** Consider emitting queue depth and active count to Prometheus / BigQuery. +- **Session length is global.** Per-user or per-tier session length would require a column on the row; currently all admitted users get the same lifetime. diff --git a/packages/internal/src/db/migrations/0043_vengeful_boomer.sql b/packages/internal/src/db/migrations/0043_vengeful_boomer.sql new file mode 100644 index 0000000000..d47a65099b --- /dev/null +++ b/packages/internal/src/db/migrations/0043_vengeful_boomer.sql @@ -0,0 +1,15 @@ +CREATE TYPE "public"."free_session_status" AS ENUM('queued', 'active');--> statement-breakpoint +CREATE TABLE "free_session" ( + "user_id" text PRIMARY KEY NOT NULL, + "status" "free_session_status" NOT NULL, + "active_instance_id" text NOT NULL, + "queued_at" timestamp with time zone DEFAULT now() NOT NULL, + "admitted_at" timestamp with time zone, + "expires_at" timestamp with time zone, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +ALTER TABLE "free_session" ADD CONSTRAINT "free_session_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint +CREATE INDEX "idx_free_session_queue" ON "free_session" USING btree ("status","queued_at");--> statement-breakpoint +CREATE INDEX "idx_free_session_expiry" ON "free_session" USING btree ("expires_at"); \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/0043_snapshot.json b/packages/internal/src/db/migrations/meta/0043_snapshot.json new file mode 100644 index 0000000000..a3dfc20144 --- /dev/null +++ b/packages/internal/src/db/migrations/meta/0043_snapshot.json @@ -0,0 +1,3202 @@ +{ + "id": "7c9172ed-5f73-4bf8-93cc-2c7e6d82a9ad", + "prevId": "c7772899-6ae6-4a07-890e-a1ca64dc6e61", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.account": { + "name": "account", + "schema": "", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "name": "account_provider_providerAccountId_pk", + "columns": [ + "provider", + "providerAccountId" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ad_impression": { + "name": "ad_impression", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ad_text": { + "name": "ad_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cta": { + "name": "cta", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "click_url": { + "name": "click_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "imp_url": { + "name": "imp_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "payout": { + "name": "payout", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true + }, + "credits_granted": { + "name": "credits_granted", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "grant_operation_id": { + "name": "grant_operation_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "served_at": { + "name": "served_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "impression_fired_at": { + "name": "impression_fired_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "clicked_at": { + "name": "clicked_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_ad_impression_user": { + "name": "idx_ad_impression_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "served_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ad_impression_imp_url": { + "name": "idx_ad_impression_imp_url", + "columns": [ + { + "expression": "imp_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ad_impression_user_id_user_id_fk": { + "name": "ad_impression_user_id_user_id_fk", + "tableFrom": "ad_impression", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ad_impression_imp_url_unique": { + "name": "ad_impression_imp_url_unique", + "nullsNotDistinct": false, + "columns": [ + "imp_url" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_config": { + "name": "agent_config", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "major": { + "name": "major", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)", + "type": "stored" + } + }, + "minor": { + "name": "minor", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)", + "type": "stored" + } + }, + "patch": { + "name": "patch", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)", + "type": "stored" + } + }, + "data": { + "name": "data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_agent_config_publisher": { + "name": "idx_agent_config_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_config_publisher_id_publisher_id_fk": { + "name": "agent_config_publisher_id_publisher_id_fk", + "tableFrom": "agent_config", + "tableTo": "publisher", + "columnsFrom": [ + "publisher_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "agent_config_publisher_id_id_version_pk": { + "name": "agent_config_publisher_id_id_version_pk", + "columns": [ + "publisher_id", + "id", + "version" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_run": { + "name": "agent_run", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END", + "type": "stored" + } + }, + "agent_name": { + "name": "agent_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END", + "type": "stored" + } + }, + "agent_version": { + "name": "agent_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END", + "type": "stored" + } + }, + "ancestor_run_ids": { + "name": "ancestor_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "root_run_id": { + "name": "root_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END", + "type": "stored" + } + }, + "parent_run_id": { + "name": "parent_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END", + "type": "stored" + } + }, + "depth": { + "name": "depth", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)", + "type": "stored" + } + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "total_steps": { + "name": "total_steps", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "direct_credits": { + "name": "direct_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "total_credits": { + "name": "total_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "status": { + "name": "status", + "type": "agent_run_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'running'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_agent_run_user_id": { + "name": "idx_agent_run_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_parent": { + "name": "idx_agent_run_parent", + "columns": [ + { + "expression": "parent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_root": { + "name": "idx_agent_run_root", + "columns": [ + { + "expression": "root_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_agent_id": { + "name": "idx_agent_run_agent_id", + "columns": [ + { + "expression": "agent_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_publisher": { + "name": "idx_agent_run_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_status": { + "name": "idx_agent_run_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'running'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_ancestors_gin": { + "name": "idx_agent_run_ancestors_gin", + "columns": [ + { + "expression": "ancestor_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_agent_run_completed_publisher_agent": { + "name": "idx_agent_run_completed_publisher_agent", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_recent": { + "name": "idx_agent_run_completed_recent", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_version": { + "name": "idx_agent_run_completed_version", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_version", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_user": { + "name": "idx_agent_run_completed_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_run_user_id_user_id_fk": { + "name": "agent_run_user_id_user_id_fk", + "tableFrom": "agent_run", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_step": { + "name": "agent_step", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "agent_run_id": { + "name": "agent_run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "step_number": { + "name": "step_number", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "credits": { + "name": "credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true, + "default": "'0'" + }, + "child_run_ids": { + "name": "child_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "spawned_count": { + "name": "spawned_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "array_length(child_run_ids, 1)", + "type": "stored" + } + }, + "message_id": { + "name": "message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "agent_step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'completed'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "unique_step_number_per_run": { + "name": "unique_step_number_per_run", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "step_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_run_id": { + "name": "idx_agent_step_run_id", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_children_gin": { + "name": "idx_agent_step_children_gin", + "columns": [ + { + "expression": "child_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "agent_step_agent_run_id_agent_run_id_fk": { + "name": "agent_step_agent_run_id_agent_run_id_fk", + "tableFrom": "agent_step", + "tableTo": "agent_run", + "columnsFrom": [ + "agent_run_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.credit_ledger": { + "name": "credit_ledger", + "schema": "", + "columns": { + "operation_id": { + "name": "operation_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "principal": { + "name": "principal", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "balance": { + "name": "balance", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "grant_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_credit_ledger_active_balance": { + "name": "idx_credit_ledger_active_balance", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "balance", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_org": { + "name": "idx_credit_ledger_org", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_subscription": { + "name": "idx_credit_ledger_subscription", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "credit_ledger_user_id_user_id_fk": { + "name": "credit_ledger_user_id_user_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "credit_ledger_org_id_org_id_fk": { + "name": "credit_ledger_org_id_org_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.encrypted_api_keys": { + "name": "encrypted_api_keys", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "api_key_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "encrypted_api_keys_user_id_user_id_fk": { + "name": "encrypted_api_keys_user_id_user_id_fk", + "tableFrom": "encrypted_api_keys", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "encrypted_api_keys_user_id_type_pk": { + "name": "encrypted_api_keys_user_id_type_pk", + "columns": [ + "user_id", + "type" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.fingerprint": { + "name": "fingerprint", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "sig_hash": { + "name": "sig_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.free_session": { + "name": "free_session", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "status": { + "name": "status", + "type": "free_session_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "active_instance_id": { + "name": "active_instance_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "queued_at": { + "name": "queued_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "admitted_at": { + "name": "admitted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_free_session_queue": { + "name": "idx_free_session_queue", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "queued_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_free_session_expiry": { + "name": "idx_free_session_expiry", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "free_session_user_id_user_id_fk": { + "name": "free_session_user_id_user_id_fk", + "tableFrom": "free_session", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.git_eval_results": { + "name": "git_eval_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "cost_mode": { + "name": "cost_mode", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reasoner_model": { + "name": "reasoner_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_model": { + "name": "agent_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "cost": { + "name": "cost", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.limit_override": { + "name": "limit_override", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "credits_per_block": { + "name": "credits_per_block", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "block_duration_hours": { + "name": "block_duration_hours", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "weekly_credit_limit": { + "name": "weekly_credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "limit_override_user_id_user_id_fk": { + "name": "limit_override_user_id_user_id_fk", + "tableFrom": "limit_override", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message": { + "name": "message", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "client_request_id": { + "name": "client_request_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "request": { + "name": "request", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "last_message": { + "name": "last_message", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "\"message\".\"request\" -> -1", + "type": "stored" + } + }, + "reasoning_text": { + "name": "reasoning_text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "reasoning_tokens": { + "name": "reasoning_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "numeric(100, 20)", + "primaryKey": false, + "notNull": true + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "byok": { + "name": "byok", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttft_ms": { + "name": "ttft_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "message_user_id_idx": { + "name": "message_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_finished_at_user_id_idx": { + "name": "message_finished_at_user_id_idx", + "columns": [ + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_idx": { + "name": "message_org_id_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_finished_at_idx": { + "name": "message_org_id_finished_at_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "message_user_id_user_id_fk": { + "name": "message_user_id_user_id_fk", + "tableFrom": "message", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_org_id_org_id_fk": { + "name": "message_org_id_org_id_fk", + "tableFrom": "message", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org": { + "name": "org", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "owner_id": { + "name": "owner_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "current_period_start": { + "name": "current_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "current_period_end": { + "name": "current_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "credit_limit": { + "name": "credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "billing_alerts": { + "name": "billing_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "usage_alerts": { + "name": "usage_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weekly_reports": { + "name": "weekly_reports", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_owner_id_user_id_fk": { + "name": "org_owner_id_user_id_fk", + "tableFrom": "org", + "tableTo": "user", + "columnsFrom": [ + "owner_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_slug_unique": { + "name": "org_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + }, + "org_stripe_customer_id_unique": { + "name": "org_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_feature": { + "name": "org_feature", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "feature": { + "name": "feature", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_org_feature_active": { + "name": "idx_org_feature_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_feature_org_id_org_id_fk": { + "name": "org_feature_org_id_org_id_fk", + "tableFrom": "org_feature", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_feature_org_id_feature_pk": { + "name": "org_feature_org_id_feature_pk", + "columns": [ + "org_id", + "feature" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_invite": { + "name": "org_invite", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "accepted_at": { + "name": "accepted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "accepted_by": { + "name": "accepted_by", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_org_invite_token": { + "name": "idx_org_invite_token", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_email": { + "name": "idx_org_invite_email", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_expires": { + "name": "idx_org_invite_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_invite_org_id_org_id_fk": { + "name": "org_invite_org_id_org_id_fk", + "tableFrom": "org_invite", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_invite_invited_by_user_id_fk": { + "name": "org_invite_invited_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "org_invite_accepted_by_user_id_fk": { + "name": "org_invite_accepted_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "accepted_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_invite_token_unique": { + "name": "org_invite_token_unique", + "nullsNotDistinct": false, + "columns": [ + "token" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_member": { + "name": "org_member", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_member_org_id_org_id_fk": { + "name": "org_member_org_id_org_id_fk", + "tableFrom": "org_member", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_member_user_id_user_id_fk": { + "name": "org_member_user_id_user_id_fk", + "tableFrom": "org_member", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_member_org_id_user_id_pk": { + "name": "org_member_org_id_user_id_pk", + "columns": [ + "org_id", + "user_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_repo": { + "name": "org_repo", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_name": { + "name": "repo_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_owner": { + "name": "repo_owner", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "approved_by": { + "name": "approved_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "approved_at": { + "name": "approved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + } + }, + "indexes": { + "idx_org_repo_active": { + "name": "idx_org_repo_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_repo_unique": { + "name": "idx_org_repo_unique", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "repo_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_repo_org_id_org_id_fk": { + "name": "org_repo_org_id_org_id_fk", + "tableFrom": "org_repo", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_repo_approved_by_user_id_fk": { + "name": "org_repo_approved_by_user_id_fk", + "tableFrom": "org_repo", + "tableTo": "user", + "columnsFrom": [ + "approved_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.publisher": { + "name": "publisher", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bio": { + "name": "bio", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "publisher_user_id_user_id_fk": { + "name": "publisher_user_id_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_org_id_org_id_fk": { + "name": "publisher_org_id_org_id_fk", + "tableFrom": "publisher", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_created_by_user_id_fk": { + "name": "publisher_created_by_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "created_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "publisher_single_owner": { + "name": "publisher_single_owner", + "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)" + } + }, + "isRLSEnabled": false + }, + "public.referral": { + "name": "referral", + "schema": "", + "columns": { + "referrer_id": { + "name": "referrer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "referred_id": { + "name": "referred_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "referral_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_legacy": { + "name": "is_legacy", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "referral_referrer_id_user_id_fk": { + "name": "referral_referrer_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referrer_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "referral_referred_id_user_id_fk": { + "name": "referral_referred_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referred_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "referral_referrer_id_referred_id_pk": { + "name": "referral_referrer_id_referred_id_pk", + "columns": [ + "referrer_id", + "referred_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.session": { + "name": "session", + "schema": "", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "fingerprint_id": { + "name": "fingerprint_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "session_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'web'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "session_fingerprint_id_fingerprint_id_fk": { + "name": "session_fingerprint_id_fingerprint_id_fk", + "tableFrom": "session", + "tableTo": "fingerprint", + "columnsFrom": [ + "fingerprint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscription": { + "name": "subscription", + "schema": "", + "columns": { + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_price_id": { + "name": "stripe_price_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tier": { + "name": "tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "scheduled_tier": { + "name": "scheduled_tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "subscription_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "billing_period_start": { + "name": "billing_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "billing_period_end": { + "name": "billing_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "cancel_at_period_end": { + "name": "cancel_at_period_end", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "canceled_at": { + "name": "canceled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_subscription_customer": { + "name": "idx_subscription_customer", + "columns": [ + { + "expression": "stripe_customer_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_user": { + "name": "idx_subscription_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_status": { + "name": "idx_subscription_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"subscription\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "subscription_user_id_user_id_fk": { + "name": "subscription_user_id_user_id_fk", + "tableFrom": "subscription", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_failure": { + "name": "sync_failure", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_attempt_at": { + "name": "last_attempt_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_count": { + "name": "retry_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_sync_failure_retry": { + "name": "idx_sync_failure_retry", + "columns": [ + { + "expression": "retry_count", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_attempt_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"sync_failure\".\"retry_count\" < 5", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user": { + "name": "user", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "next_quota_reset": { + "name": "next_quota_reset", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now() + INTERVAL '1 month'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "referral_code": { + "name": "referral_code", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'ref-' || gen_random_uuid()" + }, + "referral_limit": { + "name": "referral_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "discord_id": { + "name": "discord_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "banned": { + "name": "banned", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fallback_to_a_la_carte": { + "name": "fallback_to_a_la_carte", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "user_email_unique": { + "name": "user_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + }, + "user_stripe_customer_id_unique": { + "name": "user_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "user_referral_code_unique": { + "name": "user_referral_code_unique", + "nullsNotDistinct": false, + "columns": [ + "referral_code" + ] + }, + "user_discord_id_unique": { + "name": "user_discord_id_unique", + "nullsNotDistinct": false, + "columns": [ + "discord_id" + ] + }, + "user_handle_unique": { + "name": "user_handle_unique", + "nullsNotDistinct": false, + "columns": [ + "handle" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verificationToken": { + "name": "verificationToken", + "schema": "", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "name": "verificationToken_identifier_token_pk", + "columns": [ + "identifier", + "token" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.referral_status": { + "name": "referral_status", + "schema": "public", + "values": [ + "pending", + "completed" + ] + }, + "public.agent_run_status": { + "name": "agent_run_status", + "schema": "public", + "values": [ + "running", + "completed", + "failed", + "cancelled" + ] + }, + "public.agent_step_status": { + "name": "agent_step_status", + "schema": "public", + "values": [ + "running", + "completed", + "skipped" + ] + }, + "public.api_key_type": { + "name": "api_key_type", + "schema": "public", + "values": [ + "anthropic", + "gemini", + "openai" + ] + }, + "public.free_session_status": { + "name": "free_session_status", + "schema": "public", + "values": [ + "queued", + "active" + ] + }, + "public.grant_type": { + "name": "grant_type", + "schema": "public", + "values": [ + "free", + "referral", + "referral_legacy", + "subscription", + "purchase", + "admin", + "organization", + "ad" + ] + }, + "public.org_role": { + "name": "org_role", + "schema": "public", + "values": [ + "owner", + "admin", + "member" + ] + }, + "public.session_type": { + "name": "session_type", + "schema": "public", + "values": [ + "web", + "pat", + "cli" + ] + }, + "public.subscription_status": { + "name": "subscription_status", + "schema": "public", + "values": [ + "incomplete", + "incomplete_expired", + "trialing", + "active", + "past_due", + "canceled", + "unpaid", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/web/instrumentation.ts b/web/instrumentation.ts index b38ccc27f3..6dbcf3eaa5 100644 --- a/web/instrumentation.ts +++ b/web/instrumentation.ts @@ -11,7 +11,7 @@ import { startFireworksMonitor } from '@/server/fireworks-monitor/monitor' import { logger } from '@/util/logger' -export function register() { +export async function register() { // Handle unhandled promise rejections (async errors that aren't caught) process.on( 'unhandledRejection', @@ -48,4 +48,14 @@ export function register() { logger.info({}, '[Instrumentation] Global error handlers registered') startFireworksMonitor() + + // DB-touching admission module uses `postgres`, which imports Node built-ins + // like `crypto`. Gate on NEXT_RUNTIME so the edge bundle doesn't try to + // resolve them. + if (process.env.NEXT_RUNTIME === 'nodejs') { + const { startFreeSessionAdmission } = await import( + '@/server/free-session/admission' + ) + startFreeSessionAdmission() + } } diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index ea74ad2569..0dddb5949e 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -70,6 +70,12 @@ describe('/api/v1/chat/completions POST endpoint', () => { let mockInsertMessageBigquery: InsertMessageBigqueryFn let nextQuotaReset: string + // Bypasses the freebuff waiting-room gate in tests that exercise free-mode + // flow without seeding a session. Matches the real return for the disabled + // path so downstream logic proceeds normally. + const mockCheckSessionAdmissibleAllow = async () => + ({ ok: true, reason: 'disabled' } as const) + beforeEach(() => { nextQuotaReset = new Date( Date.now() + 3 * 24 * 60 * 60 * 1000 + 5 * 60 * 1000, @@ -238,6 +244,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: globalThis.fetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(401) @@ -265,6 +272,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(401) @@ -294,6 +302,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(400) @@ -321,6 +330,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(400) @@ -351,6 +361,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(400) @@ -383,6 +394,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(400) @@ -417,6 +429,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(403) @@ -451,6 +464,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(402) @@ -487,6 +501,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(403) @@ -524,6 +539,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -557,6 +573,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -698,6 +715,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(403) @@ -734,6 +752,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) if (response.status !== 200) { @@ -774,6 +793,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -824,6 +844,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(429) @@ -874,6 +895,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -903,6 +925,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(429) @@ -936,6 +959,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -966,6 +990,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -993,6 +1018,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) // Should continue processing (fail open) @@ -1000,7 +1026,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(mockLogger.error).toHaveBeenCalled() }) - it('continues when user is not a subscriber (null result)', async () => { + it.skip('continues when user is not a subscriber (null result)', async () => { const mockEnsureSubscriberBlockGrant = mock(async () => null) const mockGetUserPreferences: GetUserPreferencesFn = mock(async () => ({ fallbackToALaCarte: false, @@ -1018,6 +1044,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { loggerWithContext: mockLoggerWithContext, ensureSubscriberBlockGrant: mockEnsureSubscriberBlockGrant, getUserPreferences: mockGetUserPreferences, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) @@ -1025,7 +1052,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(mockGetUserPreferences).not.toHaveBeenCalled() }) - it('defaults to allowing fallback when getUserPreferences is not provided', async () => { + it.skip('defaults to allowing fallback when getUserPreferences is not provided', async () => { const weeklyLimitError: BlockGrantResult = { error: 'weekly_limit_reached', used: 3500, diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 93e052e4b6..4dfc69e133 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -67,6 +67,7 @@ import { handleOpenRouterStream, OpenRouterError, } from '@/llm-api/openrouter' +import { checkSessionAdmissible } from '@/server/free-session/public-api' import { extractApiKeyFromHeader } from '@/util/auth' import { withDefaultProperties } from '@codebuff/common/analytics' import { checkFreeModeRateLimit } from './free-mode-rate-limiter' @@ -143,6 +144,8 @@ export const formatQuotaResetCountdown = ( return `in ${pluralize(minutes, 'minute')}` } +export type CheckSessionAdmissibleFn = typeof checkSessionAdmissible + export async function postChatCompletions(params: { req: NextRequest getUserInfoFromApiKey: GetUserInfoFromApiKeyFn @@ -155,6 +158,9 @@ export async function postChatCompletions(params: { insertMessageBigquery: InsertMessageBigqueryFn ensureSubscriberBlockGrant?: (params: { userId: string; logger: Logger }) => Promise getUserPreferences?: GetUserPreferencesFn + /** Optional override for the freebuff waiting-room gate. Defaults to the + * real check backed by Postgres; tests inject a no-op. */ + checkSessionAdmissible?: CheckSessionAdmissibleFn }) { const { req, @@ -166,6 +172,7 @@ export async function postChatCompletions(params: { insertMessageBigquery, ensureSubscriberBlockGrant, getUserPreferences, + checkSessionAdmissible: checkSession = checkSessionAdmissible, } = params let { logger } = params let { trackEvent } = params @@ -394,6 +401,36 @@ export async function postChatCompletions(params: { ) } + // Freebuff waiting-room gate. Only enforced for free-mode requests, and + // only when FREEBUFF_WAITING_ROOM_ENABLED=true — otherwise this is a + // no-op that returns { ok: true, reason: 'disabled' } without a DB hit. + // Runs before the rate limiter so rejected requests don't burn a queued + // user's free-mode counters. + if (isFreeModeRequest) { + const gate = await checkSession({ + userId, + claimedInstanceId: typedBody.codebuff_metadata?.freebuff_instance_id, + }) + if (!gate.ok) { + trackEvent({ + event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, + userId, + properties: { error: gate.code }, + logger, + }) + const statusByCode: Record = { + waiting_room_required: 428, + waiting_room_queued: 429, + session_superseded: 409, + session_expired: 410, + } + return NextResponse.json( + { error: gate.code, message: gate.message }, + { status: statusByCode[gate.code] ?? 429 }, + ) + } + } + // Rate limit free mode requests (after validation so invalid requests don't consume quota) if (isFreeModeRequest) { const rateLimitResult = checkFreeModeRateLimit(userId) diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts new file mode 100644 index 0000000000..226a2a0a5e --- /dev/null +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, test } from 'bun:test' + +import { + deleteFreebuffSession, + getFreebuffSession, + postFreebuffSession, +} from '../_handlers' + +import type { FreebuffSessionDeps } from '../_handlers' +import type { SessionDeps } from '@/server/free-session/public-api' +import type { InternalSessionRow } from '@/server/free-session/types' +import type { NextRequest } from 'next/server' + +function makeReq(apiKey: string | null): NextRequest { + const headers = new Headers() + if (apiKey) headers.set('Authorization', `Bearer ${apiKey}`) + return { + headers, + } as unknown as NextRequest +} + +function makeSessionDeps(overrides: Partial = {}): SessionDeps & { + rows: Map +} { + const rows = new Map() + const now = new Date('2026-04-17T12:00:00Z') + let instanceCounter = 0 + return { + rows, + isWaitingRoomEnabled: () => true, + getMaxConcurrentSessions: () => 10, + getSessionLengthMs: () => 60 * 60_000, + now: () => now, + getSessionRow: async (userId) => rows.get(userId) ?? null, + queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length, + queuePositionFor: async () => 1, + endSession: async (userId) => { + rows.delete(userId) + }, + joinOrTakeOver: async ({ userId, now }) => { + const r: InternalSessionRow = { + user_id: userId, + status: 'queued', + active_instance_id: `inst-${++instanceCounter}`, + queued_at: now, + admitted_at: null, + expires_at: null, + created_at: now, + updated_at: now, + } + rows.set(userId, r) + return r + }, + ...overrides, + } +} + +const LOGGER = { + info: () => {}, + warn: () => {}, + error: () => {}, + debug: () => {}, +} + +function makeDeps(sessionDeps: SessionDeps, userId: string | null): FreebuffSessionDeps { + return { + logger: LOGGER as unknown as FreebuffSessionDeps['logger'], + getUserInfoFromApiKey: (async () => (userId ? { id: userId } : undefined)) as unknown as FreebuffSessionDeps['getUserInfoFromApiKey'], + sessionDeps, + } +} + +describe('POST /api/v1/freebuff/session', () => { + test('401 when Authorization header is missing', async () => { + const sessionDeps = makeSessionDeps() + const resp = await postFreebuffSession(makeReq(null), makeDeps(sessionDeps, null)) + expect(resp.status).toBe(401) + }) + + test('401 when API key is invalid', async () => { + const sessionDeps = makeSessionDeps() + const resp = await postFreebuffSession(makeReq('bad'), makeDeps(sessionDeps, null)) + expect(resp.status).toBe(401) + }) + + test('creates a queued session for authed user', async () => { + const sessionDeps = makeSessionDeps() + const resp = await postFreebuffSession(makeReq('ok'), makeDeps(sessionDeps, 'u1')) + expect(resp.status).toBe(200) + const body = await resp.json() + expect(body.status).toBe('queued') + expect(body.instanceId).toBe('inst-1') + }) + + test('returns disabled when waiting room flag is off', async () => { + const sessionDeps = makeSessionDeps({ isWaitingRoomEnabled: () => false }) + const resp = await postFreebuffSession(makeReq('ok'), makeDeps(sessionDeps, 'u1')) + const body = await resp.json() + expect(body.status).toBe('disabled') + }) +}) + +describe('GET /api/v1/freebuff/session', () => { + test('returns { status: none } when user has no session', async () => { + const sessionDeps = makeSessionDeps() + const resp = await getFreebuffSession(makeReq('ok'), makeDeps(sessionDeps, 'u1')) + expect(resp.status).toBe(200) + const body = await resp.json() + expect(body.status).toBe('none') + }) +}) + +describe('DELETE /api/v1/freebuff/session', () => { + test('ends the session', async () => { + const sessionDeps = makeSessionDeps() + // Pre-seed a row + sessionDeps.rows.set('u1', { + user_id: 'u1', + status: 'active', + active_instance_id: 'x', + queued_at: new Date(), + admitted_at: new Date(), + expires_at: new Date(Date.now() + 60_000), + created_at: new Date(), + updated_at: new Date(), + }) + const resp = await deleteFreebuffSession(makeReq('ok'), makeDeps(sessionDeps, 'u1')) + expect(resp.status).toBe(200) + expect(sessionDeps.rows.has('u1')).toBe(false) + }) +}) diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts new file mode 100644 index 0000000000..a06ec19bc4 --- /dev/null +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -0,0 +1,98 @@ +import { NextResponse } from 'next/server' + +import { + endUserSession, + getSessionState, + requestSession, +} from '@/server/free-session/public-api' +import { extractApiKeyFromHeader } from '@/util/auth' + +import type { SessionDeps } from '@/server/free-session/public-api' +import type { GetUserInfoFromApiKeyFn } from '@codebuff/common/types/contracts/database' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { NextRequest } from 'next/server' + +export interface FreebuffSessionDeps { + getUserInfoFromApiKey: GetUserInfoFromApiKeyFn + logger: Logger + sessionDeps?: SessionDeps +} + +type AuthResult = { error: NextResponse } | { userId: string } + +async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise { + const apiKey = extractApiKeyFromHeader(req) + if (!apiKey) { + return { + error: NextResponse.json( + { + error: 'unauthorized', + message: 'Missing or invalid Authorization header', + }, + { status: 401 }, + ), + } + } + const userInfo = await deps.getUserInfoFromApiKey({ + apiKey, + fields: ['id'], + logger: deps.logger, + }) + if (!userInfo?.id) { + return { + error: NextResponse.json( + { error: 'unauthorized', message: 'Invalid API key' }, + { status: 401 }, + ), + } + } + return { userId: String(userInfo.id) } +} + +/** POST /api/v1/freebuff/session — join queue / take over as this instance. */ +export async function postFreebuffSession( + req: NextRequest, + deps: FreebuffSessionDeps, +): Promise { + const auth = await resolveUser(req, deps) + if ('error' in auth) return auth.error + + const state = await requestSession({ + userId: auth.userId, + deps: deps.sessionDeps, + }) + return NextResponse.json(state, { status: 200 }) +} + +/** GET /api/v1/freebuff/session — read current state without mutation. */ +export async function getFreebuffSession( + req: NextRequest, + deps: FreebuffSessionDeps, +): Promise { + const auth = await resolveUser(req, deps) + if ('error' in auth) return auth.error + + const state = await getSessionState({ + userId: auth.userId, + deps: deps.sessionDeps, + }) + if (!state) { + return NextResponse.json( + { status: 'none', message: 'Call POST to join the waiting room.' }, + { status: 200 }, + ) + } + return NextResponse.json(state, { status: 200 }) +} + +/** DELETE /api/v1/freebuff/session — end session / leave queue immediately. */ +export async function deleteFreebuffSession( + req: NextRequest, + deps: FreebuffSessionDeps, +): Promise { + const auth = await resolveUser(req, deps) + if ('error' in auth) return auth.error + + await endUserSession({ userId: auth.userId, deps: deps.sessionDeps }) + return NextResponse.json({ status: 'ended' }, { status: 200 }) +} diff --git a/web/src/app/api/v1/freebuff/session/route.ts b/web/src/app/api/v1/freebuff/session/route.ts new file mode 100644 index 0000000000..cf5802afdb --- /dev/null +++ b/web/src/app/api/v1/freebuff/session/route.ts @@ -0,0 +1,22 @@ +import { + deleteFreebuffSession, + getFreebuffSession, + postFreebuffSession, +} from './_handlers' + +import { getUserInfoFromApiKey } from '@/db/user' +import { logger } from '@/util/logger' + +import type { NextRequest } from 'next/server' + +export async function GET(req: NextRequest) { + return getFreebuffSession(req, { getUserInfoFromApiKey, logger }) +} + +export async function POST(req: NextRequest) { + return postFreebuffSession(req, { getUserInfoFromApiKey, logger }) +} + +export async function DELETE(req: NextRequest) { + return deleteFreebuffSession(req, { getUserInfoFromApiKey, logger }) +} diff --git a/web/src/llm-api/types.ts b/web/src/llm-api/types.ts index 82cf7632cd..b3bb1eaf97 100644 --- a/web/src/llm-api/types.ts +++ b/web/src/llm-api/types.ts @@ -6,6 +6,11 @@ export interface CodebuffMetadata { run_id?: string n?: number cost_mode?: string + /** Server-issued session instance id (see /api/v1/freebuff/session). Required + * on free-mode requests when the waiting room is enabled; stale values are + * rejected so a second CLI on the same account cannot keep serving traffic + * after the first one re-admitted. */ + freebuff_instance_id?: string } export interface ChatMessage { @@ -77,7 +82,9 @@ export function isCodebuffMetadata( (v.client_id === undefined || typeof v.client_id === 'string') && (v.run_id === undefined || typeof v.run_id === 'string') && (v.n === undefined || typeof v.n === 'number') && - (v.cost_mode === undefined || typeof v.cost_mode === 'string') + (v.cost_mode === undefined || typeof v.cost_mode === 'string') && + (v.freebuff_instance_id === undefined || + typeof v.freebuff_instance_id === 'string') ) } diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts new file mode 100644 index 0000000000..613aeeadd6 --- /dev/null +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, test } from 'bun:test' + +import { runAdmissionTick } from '../admission' + +import type { AdmissionDeps } from '../admission' + +const NOW = new Date('2026-04-17T12:00:00Z') + +function makeAdmissionDeps(overrides: Partial = {}): AdmissionDeps & { + calls: { admit: number[]; expired: number; active: number } +} { + const calls = { admit: [] as number[], expired: 0, active: 0 } + return { + calls, + sweepExpired: async () => 0, + countActive: async () => 0, + queueDepth: async () => 0, + admitFromQueue: async ({ limit }) => { + calls.admit.push(limit) + return Array.from({ length: limit }, (_, i) => ({ user_id: `u${i}` })) + }, + isFireworksAdmissible: () => true, + getMaxConcurrentSessions: () => 10, + getSessionLengthMs: () => 60 * 60 * 1000, + now: () => NOW, + ...overrides, + } +} + +describe('runAdmissionTick', () => { + test('admits up to (max - active) when healthy', async () => { + const deps = makeAdmissionDeps({ + countActive: async () => 3, + getMaxConcurrentSessions: () => 10, + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(7) + expect(result.skipped).toBeNull() + }) + + test('caps admits per tick at MAX_ADMITS_PER_TICK', async () => { + const deps = makeAdmissionDeps({ + countActive: async () => 0, + getMaxConcurrentSessions: () => 1000, + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(20) + }) + + test('skips admission when Fireworks not healthy', async () => { + const deps = makeAdmissionDeps({ + isFireworksAdmissible: () => false, + countActive: async () => 0, + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(0) + expect(result.skipped).toBe('health') + }) + + test('skips when at capacity', async () => { + const deps = makeAdmissionDeps({ + countActive: async () => 10, + getMaxConcurrentSessions: () => 10, + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(0) + expect(result.skipped).toBe('full') + }) + + test('sweeps expired sessions even when skipping admission', async () => { + let swept = 0 + const deps = makeAdmissionDeps({ + sweepExpired: async () => { + swept = 3 + return 3 + }, + isFireworksAdmissible: () => false, + }) + const result = await runAdmissionTick(deps) + expect(swept).toBe(3) + expect(result.expired).toBe(3) + }) + + test('propagates expiry count and admit count together', async () => { + const deps = makeAdmissionDeps({ + sweepExpired: async () => 2, + countActive: async () => 5, + getMaxConcurrentSessions: () => 8, + }) + const result = await runAdmissionTick(deps) + expect(result.expired).toBe(2) + expect(result.admitted).toBe(3) + }) +}) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts new file mode 100644 index 0000000000..e7ba5ee9c0 --- /dev/null +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -0,0 +1,293 @@ +import { beforeEach, describe, expect, test } from 'bun:test' + +import { + checkSessionAdmissible, + endUserSession, + getSessionState, + requestSession, +} from '../public-api' + +import type { SessionDeps } from '../public-api' +import type { InternalSessionRow } from '../types' + +const SESSION_LEN = 60 * 60 * 1000 +const MAX_CONC = 10 + +function makeDeps(overrides: Partial = {}): SessionDeps & { + rows: Map + _tick: (n: Date) => void + _now: () => Date +} { + const rows = new Map() + let currentNow = new Date('2026-04-17T12:00:00Z') + let instanceCounter = 0 + + const newInstanceId = () => `inst-${++instanceCounter}` + + const deps: SessionDeps & { + rows: Map + _tick: (n: Date) => void + _now: () => Date + } = { + rows, + _tick: (n: Date) => { + currentNow = n + }, + _now: () => currentNow, + isWaitingRoomEnabled: () => true, + getMaxConcurrentSessions: () => MAX_CONC, + getSessionLengthMs: () => SESSION_LEN, + now: () => currentNow, + getSessionRow: async (userId) => rows.get(userId) ?? null, + endSession: async (userId) => { + rows.delete(userId) + }, + queueDepth: async () => { + let n = 0 + for (const r of rows.values()) if (r.status === 'queued') n++ + return n + }, + queuePositionFor: async ({ userId, queuedAt }) => { + let pos = 0 + for (const r of rows.values()) { + if (r.status !== 'queued') continue + if ( + r.queued_at.getTime() < queuedAt.getTime() || + (r.queued_at.getTime() === queuedAt.getTime() && r.user_id <= userId) + ) { + pos++ + } + } + return pos + }, + joinOrTakeOver: async ({ userId, now }) => { + const existing = rows.get(userId) + const nextInstance = newInstanceId() + if (!existing) { + const r: InternalSessionRow = { + user_id: userId, + status: 'queued', + active_instance_id: nextInstance, + queued_at: now, + admitted_at: null, + expires_at: null, + created_at: now, + updated_at: now, + } + rows.set(userId, r) + return r + } + if ( + existing.status === 'active' && + existing.expires_at && + existing.expires_at.getTime() > now.getTime() + ) { + existing.active_instance_id = nextInstance + existing.updated_at = now + return existing + } + if (existing.status === 'queued') { + existing.active_instance_id = nextInstance + existing.updated_at = now + return existing + } + existing.status = 'queued' + existing.active_instance_id = nextInstance + existing.queued_at = now + existing.admitted_at = null + existing.expires_at = null + existing.updated_at = now + return existing + }, + ...overrides, + } + return deps +} + +describe('requestSession', () => { + let deps: ReturnType + beforeEach(() => { + deps = makeDeps() + }) + + test('disabled flag returns { status: disabled } and does not touch DB', async () => { + deps.isWaitingRoomEnabled = () => true // sanity + const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) + const state = await requestSession({ userId: 'u1', deps: offDeps }) + expect(state).toEqual({ status: 'disabled' }) + expect(offDeps.rows.size).toBe(0) + }) + + test('first call puts user in queue at position 1', async () => { + const state = await requestSession({ userId: 'u1', deps }) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.position).toBe(1) + expect(state.queueDepth).toBe(1) + expect(state.instanceId).toBe('inst-1') + }) + + test('second call from same user rotates instance id, keeps queue position', async () => { + await requestSession({ userId: 'u1', deps }) + const second = await requestSession({ userId: 'u1', deps }) + if (second.status !== 'queued') throw new Error('unreachable') + expect(second.position).toBe(1) + expect(second.instanceId).toBe('inst-2') + }) + + test('multiple users queue in FIFO order', async () => { + await requestSession({ userId: 'u1', deps }) + deps._tick(new Date(deps._now().getTime() + 1000)) + await requestSession({ userId: 'u2', deps }) + + const s1 = (await getSessionState({ userId: 'u1', deps }))! + const s2 = (await getSessionState({ userId: 'u2', deps }))! + if (s1.status !== 'queued' || s2.status !== 'queued') throw new Error('unreachable') + expect(s1.position).toBe(1) + expect(s2.position).toBe(2) + }) + + test('active unexpired session → rotate instance id, preserve active state', async () => { + // Prime a user into active state manually. + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const second = await requestSession({ userId: 'u1', deps }) + expect(second.status).toBe('active') + if (second.status !== 'active') throw new Error('unreachable') + expect(second.instanceId).not.toBe('inst-1') // rotated + }) +}) + +describe('checkSessionAdmissible', () => { + let deps: ReturnType + beforeEach(() => { + deps = makeDeps() + }) + + test('disabled flag → ok with reason=disabled', async () => { + const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: undefined, + deps: offDeps, + }) + expect(result.ok).toBe(true) + }) + + test('no session → waiting_room_required', async () => { + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'x', + deps, + }) + expect(result.ok).toBe(false) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('waiting_room_required') + }) + + test('queued session → waiting_room_queued', async () => { + await requestSession({ userId: 'u1', deps }) + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'inst-1', + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('waiting_room_queued') + }) + + test('active + matching instance id → ok', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + deps, + }) + expect(result.ok).toBe(true) + if (!result.ok || result.reason !== 'active') throw new Error('unreachable') + expect(result.remainingMs).toBe(SESSION_LEN) + }) + + test('active + wrong instance id → session_superseded', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'stale-token', + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_superseded') + }) + + test('active + missing instance id → session_superseded (fails closed)', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: undefined, + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_superseded') + }) + + test('active but expires_at in the past → session_expired', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) + row.expires_at = new Date(deps._now().getTime() - 1) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_expired') + }) +}) + +describe('endUserSession', () => { + test('removes row', async () => { + const deps = makeDeps() + await requestSession({ userId: 'u1', deps }) + expect(deps.rows.has('u1')).toBe(true) + await endUserSession({ userId: 'u1', deps }) + expect(deps.rows.has('u1')).toBe(false) + }) + + test('is no-op when disabled', async () => { + const deps = makeDeps({ isWaitingRoomEnabled: () => false }) + deps.rows.set('u1', { + user_id: 'u1', + status: 'active', + active_instance_id: 'x', + queued_at: new Date(), + admitted_at: null, + expires_at: null, + created_at: new Date(), + updated_at: new Date(), + }) + await endUserSession({ userId: 'u1', deps }) + expect(deps.rows.has('u1')).toBe(true) + }) +}) diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts new file mode 100644 index 0000000000..fa5f891ab8 --- /dev/null +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -0,0 +1,110 @@ +import { describe, expect, test } from 'bun:test' + +import { estimateWaitMs, toSessionStateResponse } from '../session-view' + +import type { InternalSessionRow } from '../types' + +const SESSION_LEN = 60 * 60 * 1000 +const MAX_CONC = 50 + +function row(overrides: Partial = {}): InternalSessionRow { + const now = new Date('2026-04-17T12:00:00Z') + return { + user_id: 'u1', + status: 'queued', + active_instance_id: 'inst-1', + queued_at: now, + admitted_at: null, + expires_at: null, + created_at: now, + updated_at: now, + ...overrides, + } +} + +describe('estimateWaitMs', () => { + test('position <= capacity → 0 wait', () => { + expect(estimateWaitMs({ position: 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(0) + expect(estimateWaitMs({ position: MAX_CONC, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(0) + }) + + test('position in second wave → one full session length', () => { + expect(estimateWaitMs({ position: MAX_CONC + 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(SESSION_LEN) + }) + + test('position in third wave → two full session lengths', () => { + expect(estimateWaitMs({ position: 2 * MAX_CONC + 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(2 * SESSION_LEN) + }) + + test('degenerate inputs return 0', () => { + expect(estimateWaitMs({ position: 0, maxConcurrent: 10, sessionLengthMs: 1000 })).toBe(0) + expect(estimateWaitMs({ position: 5, maxConcurrent: 0, sessionLengthMs: 1000 })).toBe(0) + }) +}) + +describe('toSessionStateResponse', () => { + const now = new Date('2026-04-17T12:00:00Z') + + test('returns null when row is null', () => { + const view = toSessionStateResponse({ + row: null, + position: 0, + queueDepth: 0, + maxConcurrent: MAX_CONC, + sessionLengthMs: SESSION_LEN, + now, + }) + expect(view).toBeNull() + }) + + test('queued row maps to queued response with position + wait estimate', () => { + const view = toSessionStateResponse({ + row: row({ status: 'queued' }), + position: 51, + queueDepth: 100, + maxConcurrent: MAX_CONC, + sessionLengthMs: SESSION_LEN, + now, + }) + expect(view).toEqual({ + status: 'queued', + instanceId: 'inst-1', + position: 51, + queueDepth: 100, + estimatedWaitMs: SESSION_LEN, + queuedAt: now.toISOString(), + }) + }) + + test('active unexpired row maps to active response with remaining ms', () => { + const admittedAt = new Date(now.getTime() - 10 * 60_000) + const expiresAt = new Date(now.getTime() + 50 * 60_000) + const view = toSessionStateResponse({ + row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }), + position: 0, + queueDepth: 0, + maxConcurrent: MAX_CONC, + sessionLengthMs: SESSION_LEN, + now, + }) + expect(view).toEqual({ + status: 'active', + instanceId: 'inst-1', + admittedAt: admittedAt.toISOString(), + expiresAt: expiresAt.toISOString(), + remainingMs: 50 * 60_000, + }) + }) + + test('active but expired row maps to null (caller should re-queue)', () => { + const view = toSessionStateResponse({ + row: row({ status: 'active', admitted_at: now, expires_at: new Date(now.getTime() - 1) }), + position: 0, + queueDepth: 0, + maxConcurrent: MAX_CONC, + sessionLengthMs: SESSION_LEN, + now, + }) + expect(view).toBeNull() + }) +}) diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts new file mode 100644 index 0000000000..0bc9a2dfd3 --- /dev/null +++ b/web/src/server/free-session/admission.ts @@ -0,0 +1,175 @@ +import { + ADMISSION_TICK_MS, + MAX_ADMITS_PER_TICK, + getMaxConcurrentSessions, + getSessionLengthMs, + isWaitingRoomEnabled, +} from './config' +import { admitFromQueue, countActive, queueDepth, sweepExpired } from './store' + +import { isFireworksAdmissible } from '@/server/fireworks-monitor/monitor' +import { logger } from '@/util/logger' + +interface AdmissionState { + timer: ReturnType | null + inFlight: Promise | null + tickCount: number +} + +let state: AdmissionState | null = null + +/** Emit a `[FreeSessionAdmission] snapshot` log every N ticks even when + * nothing changed, so dashboards / alerts have a reliable heartbeat of + * queue depth and active count. At ADMISSION_TICK_MS=5s, 12 ticks = 1 min. */ +const SNAPSHOT_EVERY_N_TICKS = 12 + +export interface AdmissionDeps { + sweepExpired: (now: Date) => Promise + countActive: (now: Date) => Promise + queueDepth: () => Promise + admitFromQueue: (params: { + limit: number + sessionLengthMs: number + now: Date + }) => Promise<{ user_id: string }[]> + isFireworksAdmissible: () => boolean + getMaxConcurrentSessions: () => number + getSessionLengthMs: () => number + now?: () => Date +} + +const defaultDeps: AdmissionDeps = { + sweepExpired, + countActive, + queueDepth, + admitFromQueue, + isFireworksAdmissible, + getMaxConcurrentSessions, + getSessionLengthMs, +} + +export interface AdmissionTickResult { + expired: number + admitted: number + active: number + queueDepth: number + skipped: 'health' | 'full' | null +} + +/** + * Run a single admission tick: + * 1. Expire sessions past their expires_at. + * 2. If Fireworks is not 'healthy', skip admission (waiting queue grows). + * 3. Admit up to (maxConcurrent - activeCount, MAX_ADMITS_PER_TICK) users. + * + * Returns counts for observability. Safe to call concurrently across pods — + * the underlying admit query takes an advisory xact lock. + */ +export async function runAdmissionTick( + deps: AdmissionDeps = defaultDeps, +): Promise { + const now = (deps.now ?? (() => new Date()))() + const expired = await deps.sweepExpired(now) + + if (!deps.isFireworksAdmissible()) { + const [active, depth] = await Promise.all([ + deps.countActive(now), + deps.queueDepth(), + ]) + return { expired, admitted: 0, active, queueDepth: depth, skipped: 'health' } + } + + const active = await deps.countActive(now) + const max = deps.getMaxConcurrentSessions() + const capacity = Math.min(Math.max(0, max - active), MAX_ADMITS_PER_TICK) + if (capacity === 0) { + const depth = await deps.queueDepth() + return { expired, admitted: 0, active, queueDepth: depth, skipped: 'full' } + } + + const admitted = await deps.admitFromQueue({ + limit: capacity, + sessionLengthMs: deps.getSessionLengthMs(), + now, + }) + + const depth = await deps.queueDepth() + return { + expired, + admitted: admitted.length, + active: active + admitted.length, + queueDepth: depth, + skipped: null, + } +} + +function scheduleNext() { + if (!state) return + const timer = setTimeout(runTick, ADMISSION_TICK_MS) + if (typeof timer.unref === 'function') timer.unref() + state.timer = timer +} + +function runTick() { + if (!state) return + // If a tick is still inflight (previous tick ran long), skip without + // rescheduling — the inflight Promise's finally will schedule the next one. + // This prevents overlapping timers piling up. + if (state.inFlight) return + + const tickIdx = ++state.tickCount + state.inFlight = runAdmissionTick() + .then((result) => { + const changed = result.admitted > 0 || result.expired > 0 + const heartbeat = tickIdx % SNAPSHOT_EVERY_N_TICKS === 0 + if (changed || heartbeat || result.skipped === 'health') { + logger.info( + { + admitted: result.admitted, + expired: result.expired, + active: result.active, + queueDepth: result.queueDepth, + maxConcurrent: getMaxConcurrentSessions(), + skipped: result.skipped, + }, + changed ? '[FreeSessionAdmission] tick' : '[FreeSessionAdmission] snapshot', + ) + } + }) + .catch((error) => { + logger.warn( + { error: error instanceof Error ? error.message : String(error) }, + '[FreeSessionAdmission] tick failed', + ) + }) + .finally(() => { + if (!state) return + state.inFlight = null + scheduleNext() + }) +} + +export function startFreeSessionAdmission(): boolean { + if (state) return true + if (!isWaitingRoomEnabled()) { + logger.info({}, '[FreeSessionAdmission] Waiting room disabled — ticker not started') + return false + } + state = { timer: null, inFlight: null, tickCount: 0 } + runTick() + logger.info( + { tickMs: ADMISSION_TICK_MS, maxConcurrent: getMaxConcurrentSessions() }, + '[FreeSessionAdmission] Started', + ) + return true +} + +export function stopFreeSessionAdmission(): void { + if (!state) return + if (state.timer) clearTimeout(state.timer) + state = null +} + +export function __resetFreeSessionAdmissionForTests(): void { + stopFreeSessionAdmission() +} diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts new file mode 100644 index 0000000000..1fc5dc1424 --- /dev/null +++ b/web/src/server/free-session/config.ts @@ -0,0 +1,29 @@ +import { env } from '@codebuff/internal/env' + +/** + * Advisory lock ID claimed by the admission tick so only one pod admits + * users at a time. Unique magic number — keep in sync with + * packages/internal/src/db/advisory-lock.ts if centralising later. + */ +export const FREEBUFF_ADMISSION_LOCK_ID = 573924815 + +/** Admission tick cadence. Fast enough to drain the queue promptly, slow + * enough to avoid DB churn. */ +export const ADMISSION_TICK_MS = 5_000 + +/** Max users admitted in a single tick. Protects against thundering-herd + * admissions when capacity frees up all at once (e.g. after a Fireworks + * incident clears). */ +export const MAX_ADMITS_PER_TICK = 20 + +export function isWaitingRoomEnabled(): boolean { + return env.FREEBUFF_WAITING_ROOM_ENABLED +} + +export function getSessionLengthMs(): number { + return env.FREEBUFF_SESSION_LENGTH_MS +} + +export function getMaxConcurrentSessions(): number { + return env.FREEBUFF_MAX_CONCURRENT_SESSIONS +} diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts new file mode 100644 index 0000000000..b0e19b7ca9 --- /dev/null +++ b/web/src/server/free-session/public-api.ts @@ -0,0 +1,184 @@ +import { + getMaxConcurrentSessions, + getSessionLengthMs, + isWaitingRoomEnabled, +} from './config' +import { + endSession, + getSessionRow, + joinOrTakeOver, + queueDepth, + queuePositionFor, +} from './store' +import { toSessionStateResponse } from './session-view' + +import type { InternalSessionRow, SessionStateResponse } from './types' + +export interface SessionDeps { + getSessionRow: (userId: string) => Promise + joinOrTakeOver: (params: { userId: string; now: Date }) => Promise + endSession: (userId: string) => Promise + queueDepth: () => Promise + queuePositionFor: (params: { userId: string; queuedAt: Date }) => Promise + isWaitingRoomEnabled: () => boolean + getMaxConcurrentSessions: () => number + getSessionLengthMs: () => number + now?: () => Date +} + +const defaultDeps: SessionDeps = { + getSessionRow, + joinOrTakeOver, + endSession, + queueDepth, + queuePositionFor, + isWaitingRoomEnabled, + getMaxConcurrentSessions, + getSessionLengthMs, +} + +const nowOf = (deps: SessionDeps): Date => (deps.now ?? (() => new Date()))() + +async function viewForRow( + userId: string, + deps: SessionDeps, + row: InternalSessionRow, +): Promise { + const [position, depth] = + row.status === 'queued' + ? await Promise.all([ + deps.queuePositionFor({ userId, queuedAt: row.queued_at }), + deps.queueDepth(), + ]) + : [0, 0] + return toSessionStateResponse({ + row, + position, + queueDepth: depth, + maxConcurrent: deps.getMaxConcurrentSessions(), + sessionLengthMs: deps.getSessionLengthMs(), + now: nowOf(deps), + }) +} + +/** + * Client calls this on CLI startup. Semantics: + * - Waiting room disabled → { status: 'disabled' } + * - No existing session → create queued row, fresh instance_id + * - Existing active (unexpired) → rotate instance_id (takeover), preserve state + * - Existing queued → rotate instance_id, preserve queue position + * - Existing expired → re-queue at the back with fresh instance_id + */ +export async function requestSession(params: { + userId: string + deps?: SessionDeps +}): Promise { + const deps = params.deps ?? defaultDeps + if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' } + + const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) }) + // joinOrTakeOver always returns either a queued row or an active-valid row, + // both of which map to a non-null response. + const view = await viewForRow(params.userId, deps, row) + if (!view) { + throw new Error( + `unreachable: joinOrTakeOver returned unmappable row for user=${params.userId} status=${row.status} expires_at=${row.expires_at?.toISOString() ?? 'null'}`, + ) + } + return view +} + +/** + * Read-only check of the caller's current state. Does not mutate or rotate + * instance_id. Returns null when the user has no session row at all (or only + * an expired active row) — the CLI should interpret that as "call + * requestSession() first". + */ +export async function getSessionState(params: { + userId: string + deps?: SessionDeps +}): Promise { + const deps = params.deps ?? defaultDeps + if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' } + const row = await deps.getSessionRow(params.userId) + if (!row) return null + return viewForRow(params.userId, deps, row) +} + +export async function endUserSession(params: { + userId: string + deps?: SessionDeps +}): Promise { + const deps = params.deps ?? defaultDeps + if (!deps.isWaitingRoomEnabled()) return + await deps.endSession(params.userId) +} + +export type SessionGateResult = + | { ok: true; reason: 'disabled' } + | { ok: true; reason: 'active'; remainingMs: number } + | { ok: false; code: 'waiting_room_required'; message: string } + | { ok: false; code: 'waiting_room_queued'; message: string } + | { ok: false; code: 'session_superseded'; message: string } + | { ok: false; code: 'session_expired'; message: string } + +/** + * Called from the chat/completions hot path for free-mode requests. Either + * returns `{ ok: true }` (request may proceed) or a structured rejection + * the caller translates into a 4xx response. + * + * Never trusts client timestamps. The caller supplies `claimedInstanceId` + * exactly as the CLI sent it; we compare against the server-stored + * active_instance_id. Does a single DB read (the row); we intentionally do + * NOT compute queue position on rejection — the client polls GET /session + * for that detail. + */ +export async function checkSessionAdmissible(params: { + userId: string + claimedInstanceId: string | null | undefined + deps?: SessionDeps +}): Promise { + const deps = params.deps ?? defaultDeps + if (!deps.isWaitingRoomEnabled()) return { ok: true, reason: 'disabled' } + + const row = await deps.getSessionRow(params.userId) + + if (!row) { + return { + ok: false, + code: 'waiting_room_required', + message: 'No active free session. Call POST /api/v1/freebuff/session first.', + } + } + + if (row.status === 'queued') { + return { + ok: false, + code: 'waiting_room_queued', + message: 'You are in the waiting room. Poll GET /api/v1/freebuff/session for your position.', + } + } + + const now = nowOf(deps) + if (!row.expires_at || row.expires_at.getTime() <= now.getTime()) { + return { + ok: false, + code: 'session_expired', + message: 'Your free session has expired. Re-join the waiting room via POST /api/v1/freebuff/session.', + } + } + + if (!params.claimedInstanceId || params.claimedInstanceId !== row.active_instance_id) { + return { + ok: false, + code: 'session_superseded', + message: 'Another instance of freebuff has taken over this session. Only one instance per account is allowed.', + } + } + + return { + ok: true, + reason: 'active', + remainingMs: row.expires_at.getTime() - now.getTime(), + } +} diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts new file mode 100644 index 0000000000..6774b6d636 --- /dev/null +++ b/web/src/server/free-session/session-view.ts @@ -0,0 +1,66 @@ +import type { InternalSessionRow, SessionStateResponse } from './types' + +/** + * Pure function converting an internal session row (or absence thereof) into + * the public response shape. Never reads the clock — caller supplies `now` so + * behavior is deterministic under test. + */ +export function toSessionStateResponse(params: { + row: InternalSessionRow | null + position: number + queueDepth: number + maxConcurrent: number + sessionLengthMs: number + now: Date +}): SessionStateResponse | null { + const { row, position, queueDepth, maxConcurrent, sessionLengthMs, now } = params + if (!row) return null + + if (row.status === 'active' && row.expires_at && row.expires_at.getTime() > now.getTime()) { + return { + status: 'active', + instanceId: row.active_instance_id, + admittedAt: (row.admitted_at ?? row.created_at).toISOString(), + expiresAt: row.expires_at.toISOString(), + remainingMs: row.expires_at.getTime() - now.getTime(), + } + } + + if (row.status === 'queued') { + return { + status: 'queued', + instanceId: row.active_instance_id, + position, + queueDepth, + estimatedWaitMs: estimateWaitMs({ + position, + maxConcurrent, + sessionLengthMs, + }), + queuedAt: row.queued_at.toISOString(), + } + } + + // expired active — callers should treat as "no session" and re-queue + return null +} + +/** + * Upper-bound estimate: assumes full capacity and uniform session expiry. + * Real wait time is usually lower because sessions finish early. + * + * waitMs ≈ floor((position - 1) / maxConcurrent) * sessionLengthMs + * + * Position 1..maxConcurrent → 0ms (next admission tick will pick you up). + * Position maxConcurrent+1..2*maxConcurrent → one full session length. + */ +export function estimateWaitMs(params: { + position: number + maxConcurrent: number + sessionLengthMs: number +}): number { + const { position, maxConcurrent, sessionLengthMs } = params + if (position <= 0 || maxConcurrent <= 0) return 0 + const waves = Math.floor((position - 1) / maxConcurrent) + return waves * sessionLengthMs +} diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts new file mode 100644 index 0000000000..fdc6b14b1e --- /dev/null +++ b/web/src/server/free-session/store.ts @@ -0,0 +1,231 @@ +import { db } from '@codebuff/internal/db' +import * as schema from '@codebuff/internal/db/schema' +import { and, asc, count, eq, gt, inArray, lt, sql } from 'drizzle-orm' + +import { FREEBUFF_ADMISSION_LOCK_ID } from './config' + +import type { InternalSessionRow } from './types' + +/** Generate a cryptographically random instance id (token). */ +export function newInstanceId(): string { + return crypto.randomUUID() +} + +/** + * postgres.js under some configurations returns Postgres booleans as 't'/'f' + * strings rather than JS booleans. Mirrors the same coercion used in + * packages/internal/src/db/advisory-lock.ts. + */ +function coerceBool(value: unknown): boolean { + if (typeof value === 'boolean') return value + if (value === 't' || value === 'true' || value === 1) return true + return false +} + +export async function getSessionRow( + userId: string, +): Promise { + const row = await db.query.freeSession.findFirst({ + where: eq(schema.freeSession.user_id, userId), + }) + return (row as InternalSessionRow | undefined) ?? null +} + +/** + * Join the queue (or take over an existing row with a new instance_id). + * + * Semantics: + * - If no row exists: insert status=queued, fresh instance_id, queued_at=now. + * - If row exists and active+unexpired: rotate instance_id (takeover), + * preserve status/admitted_at/expires_at. + * - If row exists and expired: reset to queued with fresh instance_id + * and fresh queued_at — effectively re-queue at the back. + * - If row exists and already queued: rotate instance_id, preserve + * queued_at so user keeps their place in line. + * + * Never trusts client-supplied timestamps or instance ids. + */ +export async function joinOrTakeOver(params: { + userId: string + now: Date +}): Promise { + const { userId, now } = params + const nextInstanceId = newInstanceId() + + // Single UPSERT that encodes every case in one round-trip, race-safe + // against concurrent POSTs for the same user (the PK would otherwise turn + // two parallel INSERTs into a 500). Inside ON CONFLICT DO UPDATE, bare + // column references resolve to the existing row. + // + // Decision table (pre-update state → post-update state): + // no row → INSERT: status=queued, queued_at=now + // active & expires_at > now → rotate instance_id only (takeover) + // queued → rotate instance_id, preserve queued_at + // active & expired → re-queue at back: status=queued, + // queued_at=now, admitted_at/expires_at=null + const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${now}` + + const [row] = await db + .insert(schema.freeSession) + .values({ + user_id: userId, + status: 'queued', + active_instance_id: nextInstanceId, + queued_at: now, + created_at: now, + updated_at: now, + }) + .onConflictDoUpdate({ + target: schema.freeSession.user_id, + set: { + active_instance_id: nextInstanceId, + updated_at: now, + status: sql`CASE WHEN ${activeUnexpired} THEN 'active'::free_session_status ELSE 'queued'::free_session_status END`, + queued_at: sql`CASE + WHEN ${schema.freeSession.status} = 'queued' THEN ${schema.freeSession.queued_at} + WHEN ${activeUnexpired} THEN ${schema.freeSession.queued_at} + ELSE ${now} + END`, + admitted_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.admitted_at} ELSE NULL END`, + expires_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.expires_at} ELSE NULL END`, + }, + }) + .returning() + + if (!row) { + throw new Error(`joinOrTakeOver returned no row for user=${userId}`) + } + return row as InternalSessionRow +} + +export async function endSession(userId: string): Promise { + await db + .delete(schema.freeSession) + .where(eq(schema.freeSession.user_id, userId)) +} + +/** + * Count active non-expired sessions. Callers must already have expired old + * rows via sweepExpired() for this number to be accurate. + */ +export async function countActive(now: Date): Promise { + const rows = await db + .select({ n: count() }) + .from(schema.freeSession) + .where( + and( + eq(schema.freeSession.status, 'active'), + gt(schema.freeSession.expires_at, now), + ), + ) + return Number(rows[0]?.n ?? 0) +} + +export async function queueDepth(): Promise { + const rows = await db + .select({ n: count() }) + .from(schema.freeSession) + .where(eq(schema.freeSession.status, 'queued')) + return Number(rows[0]?.n ?? 0) +} + +/** + * 1-indexed position in the FIFO queue for a known-queued row. Ties on + * queued_at are broken deterministically by user_id. Callers already holding + * the row should prefer queuePositionFor() to skip the extra lookup. + */ +export async function queuePosition(userId: string): Promise { + const me = await db.query.freeSession.findFirst({ + where: eq(schema.freeSession.user_id, userId), + }) + if (!me || me.status !== 'queued') return 0 + return queuePositionFor({ userId, queuedAt: me.queued_at }) +} + +export async function queuePositionFor(params: { + userId: string + queuedAt: Date +}): Promise { + const rows = await db + .select({ n: count() }) + .from(schema.freeSession) + .where( + and( + eq(schema.freeSession.status, 'queued'), + sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt}, ${params.userId})`, + ), + ) + return Number(rows[0]?.n ?? 0) +} + +/** Remove rows whose active session has expired. Safe to call repeatedly. */ +export async function sweepExpired(now: Date): Promise { + const deleted = await db + .delete(schema.freeSession) + .where( + and( + eq(schema.freeSession.status, 'active'), + lt(schema.freeSession.expires_at, now), + ), + ) + .returning({ user_id: schema.freeSession.user_id }) + return deleted.length +} + +/** + * Atomically admit up to `limit` queued users, guarded by a per-transaction + * advisory lock so only one pod admits at a time. Returns admitted rows. + * + * If the advisory lock is already held, returns []. Caller should treat that + * as "another pod is handling it, skip this tick". + */ +export async function admitFromQueue(params: { + limit: number + sessionLengthMs: number + now: Date +}): Promise { + const { limit, sessionLengthMs, now } = params + if (limit <= 0) return [] + + return db.transaction(async (tx) => { + const lockResult = await tx.execute<{ acquired: unknown }>( + sql`SELECT pg_try_advisory_xact_lock(${FREEBUFF_ADMISSION_LOCK_ID}) AS acquired`, + ) + // postgres-js returns an array-like; coerceBool handles the 't'/'f' string + // case that the driver emits under some configurations. + if (!coerceBool((lockResult as unknown as Array<{ acquired: unknown }>)[0]?.acquired)) { + return [] + } + + const candidates = await tx + .select({ user_id: schema.freeSession.user_id }) + .from(schema.freeSession) + .where(eq(schema.freeSession.status, 'queued')) + .orderBy(asc(schema.freeSession.queued_at), asc(schema.freeSession.user_id)) + .limit(limit) + .for('update', { skipLocked: true }) + + if (candidates.length === 0) return [] + + const expiresAt = new Date(now.getTime() + sessionLengthMs) + const userIds = candidates.map((c) => c.user_id) + + const admitted = await tx + .update(schema.freeSession) + .set({ + status: 'active', + admitted_at: now, + expires_at: expiresAt, + updated_at: now, + }) + .where( + and( + eq(schema.freeSession.status, 'queued'), + inArray(schema.freeSession.user_id, userIds), + ), + ) + .returning() + + return admitted as InternalSessionRow[] + }) +} diff --git a/web/src/server/free-session/types.ts b/web/src/server/free-session/types.ts new file mode 100644 index 0000000000..858bd63100 --- /dev/null +++ b/web/src/server/free-session/types.ts @@ -0,0 +1,36 @@ +export type FreeSessionStatus = 'queued' | 'active' + +/** Public state returned to CLI clients. */ +export type SessionStateResponse = + | { + status: 'disabled' + /** Waiting room is globally off; free-mode requests flow through + * unchanged. Client should treat this as "admitted forever". */ + } + | { + status: 'queued' + instanceId: string + /** 1-indexed position in the FIFO queue. */ + position: number + queueDepth: number + estimatedWaitMs: number + queuedAt: string + } + | { + status: 'active' + instanceId: string + admittedAt: string + expiresAt: string + remainingMs: number + } + +export interface InternalSessionRow { + user_id: string + status: FreeSessionStatus + active_instance_id: string + queued_at: Date + admitted_at: Date | null + expires_at: Date | null + created_at: Date + updated_at: Date +} From 41ffbab8fb06251f4b82845042c94ac45f0a322f Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 17 Apr 2026 18:59:31 -0700 Subject: [PATCH 02/31] Freebuff waiting room client --- cli/src/app.tsx | 92 ++++++- .../components/freebuff-superseded-screen.tsx | 59 +++++ cli/src/components/waiting-room-screen.tsx | 200 +++++++++++++++ .../helpers/__tests__/send-message.test.ts | 138 ++++++++++ cli/src/hooks/helpers/send-message.ts | 56 +++++ cli/src/hooks/use-freebuff-session.ts | 235 ++++++++++++++++++ cli/src/hooks/use-send-message.ts | 5 + cli/src/state/freebuff-session-store.ts | 43 ++++ cli/src/types/freebuff-session.ts | 33 +++ cli/src/utils/create-run-config.ts | 3 + cli/src/utils/error-handling.ts | 34 +++ common/src/types/contracts/llm.ts | 4 + .../agent-runtime/src/prompt-agent-stream.ts | 3 + .../tools/handlers/tool/spawn-agent-utils.ts | 2 + .../src/db/migrations/meta/_journal.json | 7 + packages/internal/src/db/schema.ts | 62 +++++ packages/internal/src/env-schema.ts | 16 ++ .../provider-options-metadata.test.ts | 72 ++++++ sdk/src/impl/llm.ts | 7 +- sdk/src/run.ts | 6 + 20 files changed, 1074 insertions(+), 3 deletions(-) create mode 100644 cli/src/components/freebuff-superseded-screen.tsx create mode 100644 cli/src/components/waiting-room-screen.tsx create mode 100644 cli/src/hooks/use-freebuff-session.ts create mode 100644 cli/src/state/freebuff-session-store.ts create mode 100644 cli/src/types/freebuff-session.ts create mode 100644 sdk/src/impl/__tests__/provider-options-metadata.test.ts diff --git a/cli/src/app.tsx b/cli/src/app.tsx index cd21fa8e43..7c4c631059 100644 --- a/cli/src/app.tsx +++ b/cli/src/app.tsx @@ -4,11 +4,14 @@ import { useShallow } from 'zustand/react/shallow' import { Chat } from './chat' import { ChatHistoryScreen } from './components/chat-history-screen' +import { FreebuffSupersededScreen } from './components/freebuff-superseded-screen' import { LoginModal } from './components/login-modal' import { ProjectPickerScreen } from './components/project-picker-screen' import { TerminalLink } from './components/terminal-link' +import { WaitingRoomScreen } from './components/waiting-room-screen' import { useAuthQuery } from './hooks/use-auth-query' import { useAuthState } from './hooks/use-auth-state' +import { useFreebuffSession } from './hooks/use-freebuff-session' import { useLogo } from './hooks/use-logo' import { useSheenAnimation } from './hooks/use-sheen-animation' import { useTerminalDimensions } from './hooks/use-terminal-dimensions' @@ -297,8 +300,8 @@ export const App = ({ const chatKey = resumeChatId ?? 'current' return ( - ) } + +interface AuthedSurfaceProps { + chatKey: string + headerContent: React.ReactNode + initialPrompt: string | null + agentId?: string + fileTree: FileTreeNode[] + inputRef: React.MutableRefObject + setIsAuthenticated: React.Dispatch> + setUser: React.Dispatch> + logoutMutation: ReturnType['logoutMutation'] + continueChat: boolean + continueChatId: string | undefined + authStatus: AuthStatus + initialMode: AgentMode | undefined + gitRoot: string | null | undefined + onSwitchToGitRoot: () => void +} + +/** + * Rendered only after auth is confirmed. Owns the freebuff waiting-room gate + * so `useFreebuffSession` runs exactly once per authed session (not before + * we have a token). + */ +const AuthedSurface = ({ + chatKey, + headerContent, + initialPrompt, + agentId, + fileTree, + inputRef, + setIsAuthenticated, + setUser, + logoutMutation, + continueChat, + continueChatId, + authStatus, + initialMode, + gitRoot, + onSwitchToGitRoot, +}: AuthedSurfaceProps) => { + const { session, error: sessionError } = useFreebuffSession() + + // Terminal state: a 409 from the gate means another CLI rotated our + // instance id. Show a dedicated screen and stop polling — don't fall back + // into the waiting room, which would look like normal queued progress. + if (IS_FREEBUFF && session?.status === 'superseded') { + return + } + + // Route every non-admitted state through the waiting room: + // null → initial POST in flight + // 'queued' → waiting our turn + // 'none' → server lost our row; hook is about to re-POST + // Falling through to on 'none' would leave the user unable to send + // any free-mode request until the next poll cycle. + if ( + IS_FREEBUFF && + (session === null || + session.status === 'queued' || + session.status === 'none') + ) { + return + } + + return ( + + ) +} diff --git a/cli/src/components/freebuff-superseded-screen.tsx b/cli/src/components/freebuff-superseded-screen.tsx new file mode 100644 index 0000000000..bd730b3c66 --- /dev/null +++ b/cli/src/components/freebuff-superseded-screen.tsx @@ -0,0 +1,59 @@ +import { TextAttributes } from '@opentui/core' +import React from 'react' + +import { useLogo } from '../hooks/use-logo' +import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' +import { useTheme } from '../hooks/use-theme' +import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' + +/** + * Terminal state shown after a 409 session_superseded response. Another CLI on + * the same account rotated our instance id and we've stopped polling — the + * user needs to close the other instance and restart. + */ +export const FreebuffSupersededScreen: React.FC = () => { + const theme = useTheme() + const { contentMaxWidth } = useTerminalDimensions() + const blockColor = getLogoBlockColor(theme.name) + const accentColor = getLogoAccentColor(theme.name) + const { component: logoComponent } = useLogo({ + availableWidth: contentMaxWidth, + accentColor, + blockColor, + }) + + return ( + + {logoComponent} + + Another freebuff instance took over this account. + + + Only one CLI per account can be active at a time. + + + Close the other instance, then restart freebuff here. + + + + Press Ctrl+C to exit. + + + + ) +} diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx new file mode 100644 index 0000000000..ce97e359e5 --- /dev/null +++ b/cli/src/components/waiting-room-screen.tsx @@ -0,0 +1,200 @@ +import { TextAttributes } from '@opentui/core' +import { useRenderer } from '@opentui/react' +import React, { useEffect, useMemo, useState } from 'react' + +import { AdBanner } from './ad-banner' +import { ChoiceAdBanner } from './choice-ad-banner' +import { ShimmerText } from './shimmer-text' +import { useGravityAd } from '../hooks/use-gravity-ad' +import { useLogo } from '../hooks/use-logo' +import { useSheenAnimation } from '../hooks/use-sheen-animation' +import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' +import { useTheme } from '../hooks/use-theme' +import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' + +import type { FreebuffSessionResponse } from '../types/freebuff-session' + +interface WaitingRoomScreenProps { + session: FreebuffSessionResponse | null + error: string | null +} + +const formatWait = (ms: number): string => { + if (!Number.isFinite(ms) || ms <= 0) return 'any moment now' + const totalSeconds = Math.round(ms / 1000) + if (totalSeconds < 60) return `~${totalSeconds}s` + const minutes = Math.round(totalSeconds / 60) + if (minutes < 60) return `~${minutes} min` + const hours = Math.floor(minutes / 60) + const rem = minutes % 60 + return rem === 0 ? `~${hours}h` : `~${hours}h ${rem}m` +} + +const formatElapsed = (ms: number): string => { + if (!Number.isFinite(ms) || ms < 0) return '0s' + const totalSeconds = Math.floor(ms / 1000) + const minutes = Math.floor(totalSeconds / 60) + const seconds = totalSeconds % 60 + if (minutes === 0) return `${seconds}s` + return `${minutes}m ${seconds.toString().padStart(2, '0')}s` +} + +export const WaitingRoomScreen: React.FC = ({ + session, + error, +}) => { + const theme = useTheme() + const renderer = useRenderer() + const { terminalWidth, contentMaxWidth } = useTerminalDimensions() + + const [sheenPosition, setSheenPosition] = useState(0) + const blockColor = getLogoBlockColor(theme.name) + const accentColor = getLogoAccentColor(theme.name) + const { applySheenToChar } = useSheenAnimation({ + logoColor: theme.foreground, + accentColor, + blockColor, + terminalWidth: renderer?.width ?? terminalWidth, + sheenPosition, + setSheenPosition, + }) + const { component: logoComponent } = useLogo({ + availableWidth: contentMaxWidth, + accentColor, + blockColor, + applySheenToChar, + }) + + // Always enable ads in the waiting room — this is where monetization lives. + const { ad, adData, recordImpression } = useGravityAd({ enabled: true }) + + // Elapsed-in-queue timer. Starts from `queuedAt` so it keeps ticking even if + // the user wanders away and comes back. + const queuedAtMs = useMemo(() => { + if (session?.status === 'queued') return Date.parse(session.queuedAt) + return null + }, [session]) + const [now, setNow] = useState(() => Date.now()) + useEffect(() => { + const id = setInterval(() => setNow(Date.now()), 1000) + return () => clearInterval(id) + }, []) + const elapsedMs = queuedAtMs ? now - queuedAtMs : 0 + + const isQueued = session?.status === 'queued' + + return ( + + + {logoComponent} + + + {error && !session && ( + + ⚠ {error} + + )} + + {((!session && !error) || session?.status === 'none') && ( + + + + )} + + {isQueued && session && ( + <> + + + + + + + Position{' '} + + {session.position} + + of {session.queueDepth} + + + Estimated wait:{' '} + + {formatWait(session.estimatedWaitMs)} + + + + Waiting for {formatElapsed(elapsedMs)} + + + + + + Leave this window open — we'll ding when your session starts. + + + + )} + + {/* Server says the waiting room is disabled — this screen should not + normally render in that case, but show a minimal message just in + case App.tsx's guard is bypassed. */} + {session?.status === 'disabled' && ( + Waiting room disabled. + )} + + + + {/* Ad banner pinned to the bottom, same look-and-feel as in chat. */} + {ad && ( + + {adData?.variant === 'choice' ? ( + + ) : ( + {}} isFreeMode /> + )} + + )} + + {/* Horizontal separator (mirrors chat input divider style) */} + {!ad && ( + + {'─'.repeat(terminalWidth)} + + )} + + ) +} diff --git a/cli/src/hooks/helpers/__tests__/send-message.test.ts b/cli/src/hooks/helpers/__tests__/send-message.test.ts index 7e6e12da1a..9ffe1fdc74 100644 --- a/cli/src/hooks/helpers/__tests__/send-message.test.ts +++ b/cli/src/hooks/helpers/__tests__/send-message.test.ts @@ -1540,3 +1540,141 @@ describe('resetEarlyReturnState', () => { }) }) }) + +describe('freebuff gate errors', () => { + const makeUpdater = (messages: ChatMessage[]) => { + const updater = createBatchedMessageUpdater('ai-1', (fn: any) => { + const next = fn(messages) + messages.length = 0 + messages.push(...next) + }) + return updater + } + + const baseMessage = (): ChatMessage[] => [{ + id: 'ai-1', + variant: 'ai', + content: '', + blocks: [], + timestamp: 'now', + }] + + const gateError = (kind: string, statusCode: number) => ({ + error: kind, + statusCode, + message: 'server said so', + }) + + test('handleRunError maps 409 session_superseded to the restart-required message', () => { + const messages = baseMessage() + const updater = makeUpdater(messages) + handleRunError({ + error: gateError('session_superseded', 409), + timerController: createMockTimerController(), + updater, + setIsRetrying: () => {}, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + }) + updater.flush() + expect(messages[0].userError).toContain('Another freebuff CLI took over') + }) + + test('handleRunError maps 410 session_expired to the rejoining message', () => { + const messages = baseMessage() + const updater = makeUpdater(messages) + handleRunError({ + error: gateError('session_expired', 410), + timerController: createMockTimerController(), + updater, + setIsRetrying: () => {}, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + }) + updater.flush() + expect(messages[0].userError).toContain('no longer active') + }) + + test('handleRunError maps 428 waiting_room_required to the rejoining message', () => { + const messages = baseMessage() + const updater = makeUpdater(messages) + handleRunError({ + error: gateError('waiting_room_required', 428), + timerController: createMockTimerController(), + updater, + setIsRetrying: () => {}, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + }) + updater.flush() + expect(messages[0].userError).toContain('no longer active') + }) + + test('handleRunError maps 429 waiting_room_queued to the still-queued message', () => { + const messages = baseMessage() + const updater = makeUpdater(messages) + handleRunError({ + error: gateError('waiting_room_queued', 429), + timerController: createMockTimerController(), + updater, + setIsRetrying: () => {}, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + }) + updater.flush() + expect(messages[0].userError).toContain('still in the waiting room') + }) + + test('handleRunError ignores gate-shaped errors with non-matching status code', () => { + // An error body with error: 'session_superseded' but a 500 status should + // NOT be classified as a gate error (prevents generic 5xx from mimicking + // the structured gate responses). + const messages = baseMessage() + const updater = makeUpdater(messages) + handleRunError({ + error: { error: 'session_superseded', statusCode: 500, message: 'oops' }, + timerController: createMockTimerController(), + updater, + setIsRetrying: () => {}, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + }) + updater.flush() + expect(messages[0].userError).toBe('oops') + expect(messages[0].userError).not.toContain('took over') + }) + + test('handleRunCompletion with gate error output routes through the gate handler', () => { + const messages = baseMessage() + const updater = makeUpdater(messages) + const runState: RunState = { + sessionState: undefined as any, + output: { + type: 'error', + message: 'server said so', + error: 'session_expired', + statusCode: 410, + } as any, + } + handleRunCompletion({ + runState, + actualCredits: undefined, + agentMode: 'FREE', + timerController: createMockTimerController(), + updater, + aiMessageId: 'ai-1', + wasAbortedByUser: false, + setStreamStatus: () => {}, + setCanProcessQueue: () => {}, + updateChainInProgress: () => {}, + setHasReceivedPlanResponse: () => {}, + }) + updater.flush() + expect(messages[0].userError).toContain('no longer active') + }) +}) diff --git a/cli/src/hooks/helpers/send-message.ts b/cli/src/hooks/helpers/send-message.ts index 948ae96c5a..f85bd4b9af 100644 --- a/cli/src/hooks/helpers/send-message.ts +++ b/cli/src/hooks/helpers/send-message.ts @@ -1,10 +1,15 @@ import { getErrorObject } from '@codebuff/common/util/error' +import { + markFreebuffSessionSuperseded, + refreshFreebuffSession, +} from '../use-freebuff-session' import { getProjectRoot } from '../../project-files' import { useChatStore } from '../../state/chat-store' import { processBashContext } from '../../utils/bash-context-processor' import { markRunningAgentsAsCancelled } from '../../utils/block-operations' import { + getFreebuffGateErrorKind, isOutOfCreditsError, isFreeModeUnavailableError, OUT_OF_CREDITS_MESSAGE, @@ -387,6 +392,13 @@ export const handleRunCompletion = (params: { return } + const gateKind = getFreebuffGateErrorKind(output) + if (gateKind) { + handleFreebuffGateError(gateKind, updater) + finalizeAfterError() + return + } + // Pass the raw error message to setError (displayed in UserErrorBanner without additional wrapper formatting) updater.setError(output.message ?? DEFAULT_RUN_OUTPUT_ERROR_MESSAGE) @@ -474,7 +486,51 @@ export const handleRunError = (params: { return } + const gateKind = getFreebuffGateErrorKind(error) + if (gateKind) { + handleFreebuffGateError(gateKind, updater) + return + } + // Use setError for all errors so they display in UserErrorBanner consistently const errorMessage = errorInfo.message || 'An unexpected error occurred' updater.setError(errorMessage) } + +/** + * Surface + recover from a waiting-room gate rejection. The server rejected + * the request because our seat is no longer valid; update local state so the + * UI reflects reality and we stop sending requests until we re-admit. + */ +function handleFreebuffGateError( + kind: ReturnType, + updater: BatchedMessageUpdater, +) { + switch (kind) { + case 'waiting_room_required': + case 'session_expired': + updater.setError( + 'Your freebuff session is no longer active. Rejoining the waiting room…', + ) + // Re-POST asynchronously; UI flips back to the waiting room as soon as + // the store picks up status: 'queued'. + refreshFreebuffSession().catch(() => {}) + return + case 'waiting_room_queued': + updater.setError( + "You're still in the waiting room. Please wait for admission before sending messages.", + ) + refreshFreebuffSession().catch(() => {}) + return + case 'session_superseded': + updater.setError( + 'Another freebuff CLI took over this account. Close the other instance, then restart.', + ) + // Terminal state: stop polling and flip UI to a "please restart" screen + // so we don't silently fight the other instance for the seat. + markFreebuffSessionSuperseded() + return + default: + return + } +} diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts new file mode 100644 index 0000000000..234ef994b9 --- /dev/null +++ b/cli/src/hooks/use-freebuff-session.ts @@ -0,0 +1,235 @@ +import { env } from '@codebuff/common/env' +import { useEffect } from 'react' + +import { useFreebuffSessionStore } from '../state/freebuff-session-store' +import { getAuthTokenDetails } from '../utils/auth' +import { IS_FREEBUFF } from '../utils/constants' +import { logger } from '../utils/logger' + +import type { + FreebuffSessionResponse, + FreebuffSessionServerResponse, +} from '../types/freebuff-session' + +const POLL_INTERVAL_QUEUED_MS = 5_000 +const POLL_INTERVAL_ACTIVE_MS = 30_000 +const POLL_INTERVAL_ERROR_MS = 10_000 + +/** Play the terminal bell so users get an audible notification on admission. */ +const playAdmissionSound = () => { + try { + process.stdout.write('\x07') + } catch { + // Silent fallback — some terminals/pipes disallow writing to stdout. + } +} + +const sessionEndpoint = (): string => { + const base = (env.NEXT_PUBLIC_CODEBUFF_APP_URL || 'https://codebuff.com').replace(/\/$/, '') + return `${base}/api/v1/freebuff/session` +} + +async function callSession( + method: 'POST' | 'GET' | 'DELETE', + token: string, + signal?: AbortSignal, +): Promise { + const resp = await fetch(sessionEndpoint(), { + method, + headers: { Authorization: `Bearer ${token}` }, + signal, + }) + if (!resp.ok) { + const text = await resp.text().catch(() => '') + throw new Error( + `freebuff session ${method} failed: ${resp.status} ${text.slice(0, 200)}`, + ) + } + return (await resp.json()) as FreebuffSessionServerResponse +} + +/** + * Decide which HTTP verb to use for the next poll. GET is cheap and does not + * rotate instance_id; POST is used whenever we don't (yet) have a valid seat — + * no session, server lost our row, or an active session expired. + */ +function nextMethod(current: FreebuffSessionResponse | null): 'POST' | 'GET' { + if (current?.status === 'queued' || current?.status === 'active') return 'GET' + return 'POST' +} + +function nextDelayMs(next: FreebuffSessionResponse): number | null { + switch (next.status) { + case 'queued': + return POLL_INTERVAL_QUEUED_MS + case 'active': + return POLL_INTERVAL_ACTIVE_MS + case 'none': + // Server lost our row / active session expired — POST again ASAP. + return 0 + case 'disabled': + case 'superseded': + return null + } +} + +interface UseFreebuffSessionResult { + session: FreebuffSessionResponse | null + error: string | null +} + +interface RefreshHandle { + refresh: (opts?: { forcePost?: boolean }) => Promise + markSuperseded: () => void +} + +/** + * Module-level handle to the active hook's poll driver. Set by the hook's + * effect on mount; cleared on unmount. Lets external callers (e.g. the + * chat-completions gate-error handler) request an immediate re-POST without + * re-plumbing a ref through the component tree. + */ +let activeRefreshHandle: RefreshHandle | null = null + +/** + * Imperatively re-sync the session with the server. Call this when the + * chat-completions gate tells us our seat is no longer valid (428, 410). + * The gate handler knows the server has no valid row for us, so we force a + * POST to re-queue immediately rather than waiting for a GET→'none'→POST + * round trip. + */ +export async function refreshFreebuffSession(): Promise { + if (!IS_FREEBUFF) return + await activeRefreshHandle?.refresh({ forcePost: true }) +} + +/** + * Flip the store into a terminal `superseded` state. Polling stops and the + * UI renders a dedicated "close the other CLI and restart" screen. Called + * after a 409 session_superseded so we don't silently fight the other + * instance for the seat. + */ +export function markFreebuffSessionSuperseded(): void { + if (!IS_FREEBUFF) return + activeRefreshHandle?.markSuperseded() +} + +/** + * Manages the freebuff waiting-room session lifecycle: + * - POST on mount to join the queue / rotate instance id + * - polls GET while queued (fast) or active (slow) to keep state fresh + * - re-POSTs when the server reports we have no row (`status: 'none'`) + * - DELETE on unmount so the slot frees up for the next user + * - plays a bell on transition from queued → active + * + * In non-freebuff builds the hook seeds `{ status: 'disabled' }` and exits. + */ +export function useFreebuffSession(): UseFreebuffSessionResult { + const session = useFreebuffSessionStore((s) => s.session) + const lastFetchError = useFreebuffSessionStore((s) => s.lastFetchError) + + useEffect(() => { + if (!IS_FREEBUFF) { + useFreebuffSessionStore.getState().setSession({ status: 'disabled' }) + return + } + + const { token } = getAuthTokenDetails() + if (!token) { + logger.warn( + {}, + '[freebuff-session] No auth token; skipping waiting-room admission', + ) + useFreebuffSessionStore.getState().setError('Not authenticated') + return + } + + let cancelled = false + let controller = new AbortController() + let timer: ReturnType | null = null + let previousStatus: FreebuffSessionResponse['status'] | null = null + + const clearTimer = () => { + if (timer) { + clearTimeout(timer) + timer = null + } + } + + const schedule = (ms: number) => { + if (cancelled) return + clearTimer() + timer = setTimeout(tick, ms) + } + + const tick = async (opts: { forcePost?: boolean } = {}) => { + if (cancelled) return + const current = useFreebuffSessionStore.getState().session + const method = opts.forcePost ? 'POST' : nextMethod(current) + try { + const next = await callSession(method, token, controller.signal) + if (cancelled) return + if (previousStatus === 'queued' && next.status === 'active') { + playAdmissionSound() + } + previousStatus = next.status + useFreebuffSessionStore.getState().setSession(next) + const delay = nextDelayMs(next) + if (delay !== null) schedule(delay) + } catch (error) { + if (cancelled || controller.signal.aborted) return + const msg = error instanceof Error ? error.message : String(error) + logger.warn({ error: msg }, '[freebuff-session] fetch failed') + useFreebuffSessionStore.getState().setError(msg) + schedule(POLL_INTERVAL_ERROR_MS) + } + } + + tick() + + activeRefreshHandle = { + refresh: async (opts) => { + clearTimer() + // Abort any in-flight fetch so it can't race us and overwrite state. + controller.abort() + controller = new AbortController() + if (opts?.forcePost) { + // Reset previousStatus so the queued→active bell still fires after a + // forced re-POST (we're intentionally leaving any stale active state + // behind — we know the seat is gone). + previousStatus = null + } + await tick(opts) + }, + markSuperseded: () => { + clearTimer() + controller.abort() + previousStatus = 'superseded' + useFreebuffSessionStore.getState().setSession({ status: 'superseded' }) + }, + } + + return () => { + cancelled = true + controller.abort() + clearTimer() + activeRefreshHandle = null + + // Fire-and-forget DELETE. Only release if we actually held a slot so we + // don't generate spurious DELETEs (e.g. HMR before POST completes). + const current = useFreebuffSessionStore.getState().session + if ( + current && + (current.status === 'queued' || current.status === 'active') + ) { + callSession('DELETE', token).catch(() => {}) + } + useFreebuffSessionStore.getState().reset() + } + }, []) + + return { + session, + error: lastFetchError, + } +} diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts index 3583d7e5e4..915692151c 100644 --- a/cli/src/hooks/use-send-message.ts +++ b/cli/src/hooks/use-send-message.ts @@ -3,6 +3,7 @@ import { useCallback, useEffect, useRef } from 'react' import { setCurrentChatId } from '../project-files' import { createStreamController } from './stream-state' import { useChatStore } from '../state/chat-store' +import { getFreebuffInstanceId } from '../state/freebuff-session-store' import { getCodebuffClient } from '../utils/codebuff-client' import { AGENT_MODE_TO_ID, AGENT_MODE_TO_COST_MODE, IS_FREEBUFF } from '../utils/constants' import { createEventHandlerState } from '../utils/create-event-handler-state' @@ -445,6 +446,7 @@ export const useSendMessage = ({ }, }) + const freebuffInstanceId = getFreebuffInstanceId() const runConfig = createRunConfig({ logger, agent: resolvedAgent, @@ -455,6 +457,9 @@ export const useSendMessage = ({ eventHandlerState, signal: abortController.signal, costMode: AGENT_MODE_TO_COST_MODE[agentMode], + extraCodebuffMetadata: freebuffInstanceId + ? { freebuff_instance_id: freebuffInstanceId } + : undefined, }) logger.info({ runConfig }, '[send-message] Sending message with sdk run config') diff --git a/cli/src/state/freebuff-session-store.ts b/cli/src/state/freebuff-session-store.ts new file mode 100644 index 0000000000..ad42fc0078 --- /dev/null +++ b/cli/src/state/freebuff-session-store.ts @@ -0,0 +1,43 @@ +import { create } from 'zustand' + +import type { FreebuffSessionResponse } from '../types/freebuff-session' + +/** + * Snapshot of the waiting-room / active-session state reported by the server. + * Stored globally so both the waiting-room UI and the send-message path can + * read the current instance id without prop drilling. + */ +interface FreebuffSessionState { + session: FreebuffSessionResponse | null + lastFetchError: string | null +} + +interface FreebuffSessionActions { + setSession: (session: FreebuffSessionResponse) => void + setError: (error: string | null) => void + reset: () => void +} + +type FreebuffSessionStore = FreebuffSessionState & FreebuffSessionActions + +const initialState: FreebuffSessionState = { + session: null, + lastFetchError: null, +} + +export const useFreebuffSessionStore = create((set) => ({ + ...initialState, + setSession: (session) => set({ session, lastFetchError: null }), + setError: (lastFetchError) => set({ lastFetchError }), + reset: () => set(initialState), +})) + +/** Read the current instance id for outgoing chat requests. */ +export const getFreebuffInstanceId = (): string | undefined => { + const { session } = useFreebuffSessionStore.getState() + if (!session) return undefined + if (session.status === 'queued' || session.status === 'active') { + return session.instanceId + } + return undefined +} diff --git a/cli/src/types/freebuff-session.ts b/cli/src/types/freebuff-session.ts new file mode 100644 index 0000000000..d384825ad5 --- /dev/null +++ b/cli/src/types/freebuff-session.ts @@ -0,0 +1,33 @@ +/** + * Public shapes returned by the server at /api/v1/freebuff/session. + * Mirrors web/src/server/free-session/types.ts but duplicated here so the CLI + * doesn't need a cross-package import for a 20-line type. + */ +export type FreebuffSessionServerResponse = + | { status: 'disabled' } + | { status: 'none'; message?: string } + | { + status: 'queued' + instanceId: string + position: number + queueDepth: number + estimatedWaitMs: number + queuedAt: string + } + | { + status: 'active' + instanceId: string + admittedAt: string + expiresAt: string + remainingMs: number + } + +/** + * Client-only terminal state set when the server reports `session_superseded` + * on a chat request. Polling stops; UI tells the user to close the other CLI. + */ +export type FreebuffSessionResponse = + | FreebuffSessionServerResponse + | { status: 'superseded' } + +export type FreebuffSessionStatus = FreebuffSessionResponse['status'] diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index c68535d78d..1dab6a3ff0 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -26,6 +26,7 @@ export type CreateRunConfigParams = { eventHandlerState: EventHandlerState signal: AbortSignal costMode?: 'free' | 'normal' | 'max' | 'experimental' | 'ask' + extraCodebuffMetadata?: Record } const SENSITIVE_EXTENSIONS = new Set([ @@ -102,6 +103,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { agentDefinitions, eventHandlerState, costMode, + extraCodebuffMetadata, } = params return { @@ -116,6 +118,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { handleEvent: createEventHandler(eventHandlerState), signal: params.signal, costMode, + extraCodebuffMetadata, fileFilter: ((filePath: string) => { if (isSensitiveFile(filePath)) return { status: 'blocked' } if (isEnvTemplateFile(filePath)) return { status: 'allow-example' } diff --git a/cli/src/utils/error-handling.ts b/cli/src/utils/error-handling.ts index 1c6994ba7d..0ff8894825 100644 --- a/cli/src/utils/error-handling.ts +++ b/cli/src/utils/error-handling.ts @@ -57,6 +57,40 @@ export const isFreeModeUnavailableError = (error: unknown): boolean => { return false } +/** + * Freebuff waiting-room gate errors returned by /api/v1/chat/completions. + * + * Contract (see docs/freebuff-waiting-room.md): + * - 428 `waiting_room_required` — no session row exists; POST /session to join. + * - 429 `waiting_room_queued` — row exists but still queued. + * - 409 `session_superseded` — another CLI rotated our instance id. + * - 410 `session_expired` — active session's expires_at has passed. + */ +export type FreebuffGateErrorKind = + | 'waiting_room_required' + | 'waiting_room_queued' + | 'session_superseded' + | 'session_expired' + +const FREEBUFF_GATE_STATUS: Record = { + waiting_room_required: 428, + waiting_room_queued: 429, + session_superseded: 409, + session_expired: 410, +} + +export const getFreebuffGateErrorKind = ( + error: unknown, +): FreebuffGateErrorKind | null => { + if (!error || typeof error !== 'object') return null + const errorCode = (error as { error?: unknown }).error + const statusCode = (error as { statusCode?: unknown }).statusCode + if (typeof errorCode !== 'string') return null + const expected = FREEBUFF_GATE_STATUS[errorCode as FreebuffGateErrorKind] + if (expected === undefined || statusCode !== expected) return null + return errorCode as FreebuffGateErrorKind +} + export const OUT_OF_CREDITS_MESSAGE = `Out of credits. Please add credits at ${defaultAppUrl}/usage` export const FREE_MODE_UNAVAILABLE_MESSAGE = IS_FREEBUFF diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index 44e8f4d4e3..11c5a5ba0c 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -62,6 +62,10 @@ export type PromptAiSdkStreamFn = ( localAgentTemplates?: Record /** Cost mode - 'free' mode means 0 credits charged for all agents */ costMode?: string + /** Extra key/values merged into the request's `codebuff_metadata` field. + * Used to forward client-scoped identifiers (e.g. `freebuff_instance_id`) + * that server-side gates read from the chat-completions body. */ + extraCodebuffMetadata?: Record sendAction: SendActionFn logger: Logger trackEvent: TrackEventFn diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts index 386af6af2a..c3ce83d15d 100644 --- a/packages/agent-runtime/src/prompt-agent-stream.ts +++ b/packages/agent-runtime/src/prompt-agent-stream.ts @@ -15,6 +15,7 @@ export const getAgentStreamFromTemplate = (params: { apiKey: string clientSessionId: string costMode?: string + extraCodebuffMetadata?: Record fingerprintId: string includeCacheControl?: boolean localAgentTemplates: Record @@ -44,6 +45,7 @@ export const getAgentStreamFromTemplate = (params: { apiKey, clientSessionId, costMode, + extraCodebuffMetadata, fingerprintId, includeCacheControl, localAgentTemplates, @@ -75,6 +77,7 @@ export const getAgentStreamFromTemplate = (params: { apiKey, clientSessionId, costMode, + extraCodebuffMetadata, fingerprintId, includeCacheControl, logger, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index 0f6c3884b6..879422d9cd 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -40,6 +40,7 @@ export type SubagentContextParams = AgentRuntimeDeps & AgentRuntimeScopedDeps & { clientSessionId: string costMode?: string + extraCodebuffMetadata?: Record fileContext: ProjectFileContext localAgentTemplates: Record repoId: string | undefined @@ -93,6 +94,7 @@ export function extractSubagentContextParams( // Core context params clientSessionId: params.clientSessionId, costMode: params.costMode, + extraCodebuffMetadata: params.extraCodebuffMetadata, fileContext: params.fileContext, localAgentTemplates: params.localAgentTemplates, repoId: params.repoId, diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json index a8183fcf3e..1370866594 100644 --- a/packages/internal/src/db/migrations/meta/_journal.json +++ b/packages/internal/src/db/migrations/meta/_journal.json @@ -302,6 +302,13 @@ "when": 1773878149145, "tag": "0042_needy_jack_murdock", "breakpoints": true + }, + { + "idx": 43, + "version": "7", + "when": 1776461642346, + "tag": "0043_vengeful_boomer", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index 0033314f00..cd7762eee1 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -795,3 +795,65 @@ export const agentStep = pgTable( index('idx_agent_step_children_gin').using('gin', table.child_run_ids), ], ) + +export const freeSessionStatusEnum = pgEnum('free_session_status', [ + 'queued', + 'active', +]) + +/** + * Free-user session / waiting-room state. One row per user is enforced by the + * PK on user_id so a single account cannot occupy multiple active sessions. + * + * Status transitions: + * none → (POST /session) → queued + * queued → (admission tick) → active + * active → (expires_at in past) → treated as expired; next POST re-queues + * any → (DELETE /session) → row removed + * + * active_instance_id is server-generated on every POST /session and rotates + * when a new CLI takes over. Chat completions requires a matching + * active_instance_id so prior instances stop serving requests. + */ +export const freeSession = pgTable( + 'free_session', + { + user_id: text('user_id') + .primaryKey() + .references(() => user.id, { onDelete: 'cascade' }), + status: freeSessionStatusEnum('status').notNull(), + active_instance_id: text('active_instance_id').notNull(), + queued_at: timestamp('queued_at', { + mode: 'date', + withTimezone: true, + }) + .notNull() + .defaultNow(), + admitted_at: timestamp('admitted_at', { + mode: 'date', + withTimezone: true, + }), + expires_at: timestamp('expires_at', { + mode: 'date', + withTimezone: true, + }), + created_at: timestamp('created_at', { + mode: 'date', + withTimezone: true, + }) + .notNull() + .defaultNow(), + updated_at: timestamp('updated_at', { + mode: 'date', + withTimezone: true, + }) + .notNull() + .defaultNow(), + }, + (table) => [ + // Dequeue: SELECT ... WHERE status='queued' ORDER BY queued_at LIMIT N + index('idx_free_session_queue').on(table.status, table.queued_at), + // Expiry sweep: SELECT ... WHERE status='active' AND expires_at < now() + index('idx_free_session_expiry').on(table.expires_at), + ], +) diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index ee789a4d1d..13d934fb57 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -32,6 +32,17 @@ export const serverEnvSchema = clientEnvSchema.extend({ DISCORD_PUBLIC_KEY: z.string().min(1), DISCORD_BOT_TOKEN: z.string().min(1), DISCORD_APPLICATION_ID: z.string().min(1), + + // Freebuff waiting room. Defaults to OFF so the feature requires explicit + // opt-in per environment — the CLI/SDK do not yet send + // freebuff_instance_id, so enabling this before they ship would reject + // every free-mode request with 428 waiting_room_required. + FREEBUFF_WAITING_ROOM_ENABLED: z + .enum(['true', 'false']) + .default('false') + .transform((v) => v === 'true'), + FREEBUFF_SESSION_LENGTH_MS: z.coerce.number().int().positive().default(60 * 60 * 1000), + FREEBUFF_MAX_CONCURRENT_SESSIONS: z.coerce.number().int().positive().default(50), }) export const serverEnvVars = serverEnvSchema.keyof().options export type ServerEnvVar = (typeof serverEnvVars)[number] @@ -79,4 +90,9 @@ export const serverProcessEnv: ServerInput = { DISCORD_PUBLIC_KEY: process.env.DISCORD_PUBLIC_KEY, DISCORD_BOT_TOKEN: process.env.DISCORD_BOT_TOKEN, DISCORD_APPLICATION_ID: process.env.DISCORD_APPLICATION_ID, + + // Freebuff waiting room + FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED, + FREEBUFF_SESSION_LENGTH_MS: process.env.FREEBUFF_SESSION_LENGTH_MS, + FREEBUFF_MAX_CONCURRENT_SESSIONS: process.env.FREEBUFF_MAX_CONCURRENT_SESSIONS, } diff --git a/sdk/src/impl/__tests__/provider-options-metadata.test.ts b/sdk/src/impl/__tests__/provider-options-metadata.test.ts new file mode 100644 index 0000000000..908ce5446f --- /dev/null +++ b/sdk/src/impl/__tests__/provider-options-metadata.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from 'bun:test' + +import { getProviderOptions } from '../llm' + +describe('getProviderOptions — codebuff_metadata', () => { + const baseParams = { + model: 'openrouter/anthropic/claude-sonnet-4-5', + runId: 'run-1', + clientSessionId: 'session-1', + } + + it('includes run_id and client_id in codebuff_metadata', () => { + const opts = getProviderOptions(baseParams) + const meta = (opts.codebuff as any).codebuff_metadata + expect(meta).toMatchObject({ + run_id: 'run-1', + client_id: 'session-1', + }) + }) + + it('merges extraCodebuffMetadata into codebuff_metadata', () => { + const opts = getProviderOptions({ + ...baseParams, + extraCodebuffMetadata: { freebuff_instance_id: 'abc-123' }, + }) + const meta = (opts.codebuff as any).codebuff_metadata + expect(meta).toMatchObject({ + run_id: 'run-1', + client_id: 'session-1', + freebuff_instance_id: 'abc-123', + }) + }) + + it('omits extra keys when extraCodebuffMetadata is undefined', () => { + const opts = getProviderOptions(baseParams) + const meta = (opts.codebuff as any).codebuff_metadata + expect(Object.keys(meta)).toEqual( + expect.arrayContaining(['run_id', 'client_id']), + ) + expect(meta.freebuff_instance_id).toBeUndefined() + }) + + it('cost_mode passes through alongside extra metadata', () => { + const opts = getProviderOptions({ + ...baseParams, + costMode: 'free', + extraCodebuffMetadata: { freebuff_instance_id: 'uuid-xyz' }, + }) + const meta = (opts.codebuff as any).codebuff_metadata + expect(meta).toMatchObject({ + cost_mode: 'free', + freebuff_instance_id: 'uuid-xyz', + }) + }) + + it('extraCodebuffMetadata does not overwrite reserved keys', () => { + const opts = getProviderOptions({ + ...baseParams, + costMode: 'free', + extraCodebuffMetadata: { + // These are intentionally the same keys the function already sets — + // make sure a misuse doesn't let callers override server-trusted + // identifiers. The spread currently puts caller keys last, which + // means it WOULD override. If that's ever intentional, change this + // test; for now, lock it down. + run_id: 'evil-override', + }, + }) + const meta = (opts.codebuff as any).codebuff_metadata + expect(meta.run_id).toBe('run-1') + }) +}) diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 8fc68f24c9..21cf1c59c5 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -62,7 +62,7 @@ function calculateUsedCredits(params: { costDollars: number }): number { return Math.round(costDollars * (1 + PROFIT_MARGIN) * 100) } -function getProviderOptions(params: { +export function getProviderOptions(params: { model: string runId: string clientSessionId: string @@ -71,6 +71,7 @@ function getProviderOptions(params: { n?: number costMode?: string cacheDebugCorrelation?: string + extraCodebuffMetadata?: Record }): { codebuff: JSONObject } { const { model, @@ -81,6 +82,7 @@ function getProviderOptions(params: { n, costMode, cacheDebugCorrelation, + extraCodebuffMetadata, } = params let providerConfig: Record @@ -105,6 +107,9 @@ function getProviderOptions(params: { ...providerOptions?.codebuff, // All values here get appended to the request body codebuff_metadata: { + // Caller-supplied keys go first so they can't override reserved + // identifiers like run_id/client_id/cost_mode that the server trusts. + ...(extraCodebuffMetadata ?? {}), run_id: runId, client_id: clientSessionId, ...(n && { n }), diff --git a/sdk/src/run.ts b/sdk/src/run.ts index 57b42ffbd3..5a18f7025c 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -147,6 +147,10 @@ export type RunOptions = { extraToolResults?: ToolMessage[] signal?: AbortSignal costMode?: string + /** Extra key/values merged into each LLM request's `codebuff_metadata`. + * Used by hosts (e.g. the CLI) to forward client-scoped identifiers like + * `freebuff_instance_id` that server-side gates read from the request body. */ + extraCodebuffMetadata?: Record } const createAbortError = (signal?: AbortSignal) => { @@ -213,6 +217,7 @@ async function runOnce({ extraToolResults, signal, costMode, + extraCodebuffMetadata, }: RunExecutionOptions): Promise { const fsSourceValue = typeof fsSource === 'function' ? fsSource() : fsSource const fs = await fsSourceValue @@ -509,6 +514,7 @@ async function runOnce({ repoId: undefined, clientSessionId: promptId, userId, + extraCodebuffMetadata, signal: signal ?? new AbortController().signal, }).catch((error) => { let errorMessage = From fa1f8f819f159281d3396573816cabb5da36ae92 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 17 Apr 2026 19:17:16 -0700 Subject: [PATCH 03/31] Fix cli test --- bunfig.toml | 2 +- .../hooks/helpers/__tests__/send-message.test.ts | 6 +++++- cli/tsconfig.json | 1 + test/setup-scm-loader.ts | 15 +++++++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 test/setup-scm-loader.ts diff --git a/bunfig.toml b/bunfig.toml index 7068677e56..b794ad0991 100644 --- a/bunfig.toml +++ b/bunfig.toml @@ -7,4 +7,4 @@ linkWorkspacePackages = true [test] # Exclude test repositories, integration tests, and Playwright e2e tests from test execution by default exclude = ["evals/test-repos/**", "**/*.integration.test.*", "web/src/__tests__/e2e/**"] -preload = ["./sdk/test/setup-env.ts", "./test/setup-bigquery-mocks.ts", "./web/test/setup-globals.ts"] +preload = ["./test/setup-scm-loader.ts", "./sdk/test/setup-env.ts", "./test/setup-bigquery-mocks.ts", "./web/test/setup-globals.ts"] diff --git a/cli/src/hooks/helpers/__tests__/send-message.test.ts b/cli/src/hooks/helpers/__tests__/send-message.test.ts index 9ffe1fdc74..4f36bab721 100644 --- a/cli/src/hooks/helpers/__tests__/send-message.test.ts +++ b/cli/src/hooks/helpers/__tests__/send-message.test.ts @@ -1635,8 +1635,12 @@ describe('freebuff gate errors', () => { // the structured gate responses). const messages = baseMessage() const updater = makeUpdater(messages) + const err = Object.assign(new Error('oops'), { + error: 'session_superseded', + statusCode: 500, + }) handleRunError({ - error: { error: 'session_superseded', statusCode: 500, message: 'oops' }, + error: err, timerController: createMockTimerController(), updater, setIsRetrying: () => {}, diff --git a/cli/tsconfig.json b/cli/tsconfig.json index d4b7a92834..127c0f0f1c 100644 --- a/cli/tsconfig.json +++ b/cli/tsconfig.json @@ -12,6 +12,7 @@ "esModuleInterop": true, "skipLibCheck": true, "preserveSymlinks": false, + "baseUrl": ".", "paths": { "@codebuff/sdk": ["../sdk/src/index.ts"] } diff --git a/test/setup-scm-loader.ts b/test/setup-scm-loader.ts new file mode 100644 index 0000000000..6acafba756 --- /dev/null +++ b/test/setup-scm-loader.ts @@ -0,0 +1,15 @@ +import { plugin } from 'bun' +import { readFile } from 'fs/promises' + +plugin({ + name: 'scm-text-loader', + setup(build) { + build.onLoad({ filter: /\.scm$/ }, async (args) => { + const text = await readFile(args.path, 'utf8') + return { + exports: { default: text }, + loader: 'object', + } + }) + }, +}) From 2d9f08160f0d1a2c76fe19e380a2779d60231dd2 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 00:11:44 -0700 Subject: [PATCH 04/31] 10 minutes before cache clears in free mode --- agents/base2/base2.ts | 50 +++++++++++++++++++++++++++------------- agents/context-pruner.ts | 18 +++++++++++++-- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index c20359d14c..3bd7956260 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -284,22 +284,40 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} noAskUser, }), - handleSteps: function* ({ params }) { - while (true) { - // Run context-pruner before each step - yield { - toolName: 'spawn_agent_inline', - input: { - agent_type: 'context-pruner', - params: params ?? {}, - }, - includeToolCall: false, - } as any - - const { stepsComplete } = yield 'STEP' - if (stepsComplete) break - } - }, + // handleSteps is serialized via .toString() and re-eval'd, so closure + // variables like `isFree` are not in scope at runtime. Pick the right + // literal-baked function here instead. + handleSteps: isFree + ? function* ({ params }) { + while (true) { + yield { + toolName: 'spawn_agent_inline', + input: { + agent_type: 'context-pruner', + params: { ...(params ?? {}), cacheExpiryMs: 10 * 60 * 1000 }, + }, + includeToolCall: false, + } as any + + const { stepsComplete } = yield 'STEP' + if (stepsComplete) break + } + } + : function* ({ params }) { + while (true) { + yield { + toolName: 'spawn_agent_inline', + input: { + agent_type: 'context-pruner', + params: params ?? {}, + }, + includeToolCall: false, + } as any + + const { stepsComplete } = yield 'STEP' + if (stepsComplete) break + } + }, } } diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts index fd98630d3a..804f3cebb5 100644 --- a/agents/context-pruner.ts +++ b/agents/context-pruner.ts @@ -31,6 +31,9 @@ const definition: AgentDefinition = { userBudget: { type: 'number', }, + cacheExpiryMs: { + type: 'number', + }, }, required: [], }, @@ -74,8 +77,8 @@ const definition: AgentDefinition = { /** Fudge factor for token count threshold to trigger pruning earlier */ const TOKEN_COUNT_FUDGE_FACTOR = 1_000 - /** Prompt cache expiry time (Anthropic caches for 5 minutes) */ - const CACHE_EXPIRY_MS = 5 * 60 * 1000 + /** Prompt cache expiry time (Anthropic caches for 5 minutes by default) */ + const CACHE_EXPIRY_MS: number = params?.cacheExpiryMs ?? 5 * 60 * 1000 /** Header used in conversation summaries */ const SUMMARY_HEADER = @@ -328,6 +331,17 @@ const definition: AgentDefinition = { currentMessages.splice(lastSubagentSpawnIndex, 1) } + // Also remove the params USER_PROMPT if params were provided to this agent + // (this is the message like {"cacheExpiryMs": 600000}) + if (params && Object.keys(params).length > 0) { + const lastUserPromptIndex = currentMessages.findLastIndex((message) => + message.tags?.includes('USER_PROMPT'), + ) + if (lastUserPromptIndex !== -1) { + currentMessages.splice(lastUserPromptIndex, 1) + } + } + // Check for prompt cache miss (>5 min gap before the USER_PROMPT message) // The USER_PROMPT is the actual user message; INSTRUCTIONS_PROMPT comes after it // We need to find the USER_PROMPT and check the gap between it and the last assistant message From 90a95809fb3bb7fdb3b3f066db2ba09452323572 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 00:13:43 -0700 Subject: [PATCH 05/31] Remove thinker-with-files-gemini from freebuff --- agents/base2/base2.ts | 6 ------ common/src/constants/free-agents.ts | 3 --- common/src/tools/params/tool/spawn-agents.ts | 2 +- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 3bd7956260..c4b080d60e 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -88,7 +88,6 @@ export function createBase2( isFree && 'code-reviewer-lite', isDefault && 'code-reviewer', isMax && 'code-reviewer-multi-prompt', - isFree && 'thinker-with-files-gemini', 'thinker-gpt', 'context-pruner', ), @@ -144,7 +143,6 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u ${buildArray( '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.', isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.', - isFree && `Spawn the thinker-with-files-gemini agent for complex problems — it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history.`, isDefault && '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', (isDefault || isMax) && @@ -354,8 +352,6 @@ ${buildArray( 'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.', (isDefault || isMax || isFree) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, - isFree && - `- For complex problems, spawn the thinker-with-files-gemini agent after gathering context. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths.`, (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, isDefault && @@ -400,8 +396,6 @@ function buildImplementationStepPrompt({ isMax && `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, 'Consider loading relevant skills with the skill tool if they might help with the current task. Do not reload skills that were already loaded earlier in this conversation.', - isFree && - `Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.`, isMax && `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`, (isDefault || isMax) && diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 551500f3f5..c285ba7c8d 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -37,9 +37,6 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Code reviewer for free mode 'code-reviewer-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']), - - // Thinker for free mode - 'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']), } /** diff --git a/common/src/tools/params/tool/spawn-agents.ts b/common/src/tools/params/tool/spawn-agents.ts index fe88beaa07..0ba3e9268f 100644 --- a/common/src/tools/params/tool/spawn-agents.ts +++ b/common/src/tools/params/tool/spawn-agents.ts @@ -34,7 +34,7 @@ const inputSchema = z cwd: z.string().optional().describe('Optional working directory relative to project root'), maxResults: z.number().optional().describe('Max results per file. Default 15'), })).optional().describe('Array of code search queries (code-searcher)'), - filePaths: z.array(z.string()).optional().describe('Relevant file paths to read (opus-agent, gpt-5-agent, thinker-with-files-gemini)'), + filePaths: z.array(z.string()).optional().describe('Relevant file paths to read (opus-agent, gpt-5-agent)'), directories: z.array(z.string()).optional().describe('Directories to search within (file-picker)'), url: z.string().optional().describe('Starting URL to navigate to (browser-use)'), prompts: z.array(z.string()).optional().describe('Array of strategy prompts (editor-multi-prompt, code-reviewer-multi-prompt)'), From c76c5a32487a7b55e65ad9799e67ec1faa85624c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 18 Apr 2026 07:18:42 +0000 Subject: [PATCH 06/31] Bump version to 1.0.642 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 1eb51b176f..efd5156709 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.641", + "version": "1.0.642", "description": "AI coding agent", "license": "MIT", "bin": { From 35d07753fe614346758999cc5164d9697425f7fb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 18 Apr 2026 07:18:53 +0000 Subject: [PATCH 07/31] Bump Freebuff version to 0.0.34 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index dc00bf86cd..50a6b6b395 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.33", + "version": "0.0.34", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 0f261bf0c34209717525d376174892a8685cb096 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 00:23:09 -0700 Subject: [PATCH 08/31] Increase test timeout --- .../api/v1/chat/completions/__tests__/completions.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 0dddb5949e..4baee74992 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -805,6 +805,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) describe('Subscription limit enforcement', () => { + // Bumped from Bun's 5s default: the non-streaming fetch-path tests here + // have flaked right at the boundary (observed 5001ms) on loaded machines. + const SUBSCRIPTION_TEST_TIMEOUT_MS = 15000 + const createValidRequest = () => new NextRequest('http://localhost:3000/api/v1/chat/completions', { method: 'POST', @@ -1050,7 +1054,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(200) // getUserPreferences should not be called for non-subscribers expect(mockGetUserPreferences).not.toHaveBeenCalled() - }) + }, SUBSCRIPTION_TEST_TIMEOUT_MS) it.skip('defaults to allowing fallback when getUserPreferences is not provided', async () => { const weeklyLimitError: BlockGrantResult = { @@ -1077,7 +1081,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { // Should continue processing (default to allowing a-la-carte) expect(response.status).toBe(200) - }) + }, SUBSCRIPTION_TEST_TIMEOUT_MS) it('allows subscriber with 0 a-la-carte credits but active block grant', async () => { const blockGrant: BlockGrantResult = { From 9f8de8a71ce72cd2e589b7a4a4abe356a5af0186 Mon Sep 17 00:00:00 2001 From: Shangxin Date: Sat, 18 Apr 2026 15:25:48 +0800 Subject: [PATCH 09/31] fix: avoid DNS lookup after proxied release CONNECT (#506) --- cli/release-staging/http.js | 176 +++++++++++++ cli/release-staging/index.js | 125 +-------- cli/release-staging/package.json | 1 + cli/release/http.js | 176 +++++++++++++ cli/release/index.js | 125 +-------- cli/release/package.json | 1 + .../__tests__/release/proxy-http-get.test.ts | 237 ++++++++++++++++++ freebuff/cli/release/http.js | 176 +++++++++++++ freebuff/cli/release/index.js | 125 +-------- freebuff/cli/release/package.json | 1 + 10 files changed, 786 insertions(+), 357 deletions(-) create mode 100644 cli/release-staging/http.js create mode 100644 cli/release/http.js create mode 100644 cli/src/__tests__/release/proxy-http-get.test.ts create mode 100644 freebuff/cli/release/http.js diff --git a/cli/release-staging/http.js b/cli/release-staging/http.js new file mode 100644 index 0000000000..3419e80ca3 --- /dev/null +++ b/cli/release-staging/http.js @@ -0,0 +1,176 @@ +const http = require('http') +const https = require('https') +const tls = require('tls') + +function createReleaseHttpClient({ + env = process.env, + userAgent, + requestTimeout, + httpModule = http, + httpsModule = https, + tlsModule = tls, +}) { + function getProxyUrl() { + return ( + env.HTTPS_PROXY || + env.https_proxy || + env.HTTP_PROXY || + env.http_proxy || + null + ) + } + + function shouldBypassProxy(hostname) { + const noProxy = env.NO_PROXY || env.no_proxy || '' + if (!noProxy) return false + + const domains = noProxy + .split(',') + .map((domain) => domain.trim().toLowerCase().replace(/:\d+$/, '')) + const host = hostname.toLowerCase() + + return domains.some((domain) => { + if (domain === '*') return true + if (domain.startsWith('.')) { + return host.endsWith(domain) || host === domain.slice(1) + } + return host === domain || host.endsWith(`.${domain}`) + }) + } + + function connectThroughProxy(proxyUrl, targetHost, targetPort) { + return new Promise((resolve, reject) => { + const proxy = new URL(proxyUrl) + const isHttpsProxy = proxy.protocol === 'https:' + const connectOptions = { + hostname: proxy.hostname, + port: proxy.port || (isHttpsProxy ? 443 : 80), + method: 'CONNECT', + path: `${targetHost}:${targetPort}`, + headers: { + Host: `${targetHost}:${targetPort}`, + }, + } + + if (proxy.username || proxy.password) { + const auth = Buffer.from( + `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent( + proxy.password || '', + )}`, + ).toString('base64') + connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` + } + + const transport = isHttpsProxy ? httpsModule : httpModule + const req = transport.request(connectOptions) + + req.on('connect', (res, socket) => { + if (res.statusCode === 200) { + resolve(socket) + return + } + + socket.destroy() + reject(new Error(`Proxy CONNECT failed with status ${res.statusCode}`)) + }) + + req.on('error', (error) => { + reject(new Error(`Proxy connection failed: ${error.message}`)) + }) + + req.setTimeout(requestTimeout, () => { + req.destroy() + reject(new Error('Proxy connection timeout.')) + }) + + req.end() + }) + } + + async function buildRequestOptions(url, options = {}) { + const parsedUrl = new URL(url) + const reqOptions = { + hostname: parsedUrl.hostname, + port: parsedUrl.port || 443, + path: parsedUrl.pathname + parsedUrl.search, + headers: { + 'User-Agent': userAgent, + ...options.headers, + }, + } + + const proxyUrl = getProxyUrl() + if (!proxyUrl || shouldBypassProxy(parsedUrl.hostname)) { + return reqOptions + } + + const tunnelSocket = await connectThroughProxy( + proxyUrl, + parsedUrl.hostname, + parsedUrl.port || 443, + ) + + class TunnelAgent extends httpsModule.Agent { + createConnection(_options, callback) { + const secureSocket = tlsModule.connect({ + socket: tunnelSocket, + servername: parsedUrl.hostname, + }) + + if (typeof callback === 'function') { + if (typeof secureSocket.once === 'function') { + let settled = false + const finish = (error) => { + if (settled) return + settled = true + callback(error || null, error ? undefined : secureSocket) + } + + secureSocket.once('secureConnect', () => finish(null)) + secureSocket.once('error', (error) => finish(error)) + } else { + callback(null, secureSocket) + } + } + + return secureSocket + } + } + + reqOptions.agent = new TunnelAgent({ keepAlive: false }) + return reqOptions + } + + async function httpGet(url, options = {}) { + const reqOptions = await buildRequestOptions(url, options) + + return new Promise((resolve, reject) => { + const req = httpsModule.get(reqOptions, (res) => { + if (res.statusCode === 301 || res.statusCode === 302) { + res.resume() + httpGet(new URL(res.headers.location, url).href, options) + .then(resolve) + .catch(reject) + return + } + + resolve(res) + }) + + req.on('error', reject) + req.setTimeout(options.timeout || requestTimeout, () => { + req.destroy() + reject(new Error('Request timeout.')) + }) + }) + } + + return { + getProxyUrl, + httpGet, + } +} + +module.exports = { + createReleaseHttpClient, +} diff --git a/cli/release-staging/index.js b/cli/release-staging/index.js index 14f229fb4c..083e8879a9 100644 --- a/cli/release-staging/index.js +++ b/cli/release-staging/index.js @@ -6,10 +6,10 @@ const http = require('http') const https = require('https') const os = require('os') const path = require('path') -const tls = require('tls') const zlib = require('zlib') const tar = require('tar') +const { createReleaseHttpClient } = require('./http') const packageName = 'codecane' @@ -66,6 +66,11 @@ function createConfig(packageName) { } const CONFIG = createConfig(packageName) +const { getProxyUrl, httpGet } = createReleaseHttpClient({ + env: process.env, + userAgent: CONFIG.userAgent, + requestTimeout: CONFIG.requestTimeout, +}) function getPostHogConfig() { const apiKey = @@ -131,76 +136,6 @@ function trackUpdateFailed(errorMessage, version, context = {}) { } } -function getProxyUrl() { - return ( - process.env.HTTPS_PROXY || - process.env.https_proxy || - process.env.HTTP_PROXY || - process.env.http_proxy || - null - ) -} - -function shouldBypassProxy(hostname) { - const noProxy = process.env.NO_PROXY || process.env.no_proxy || '' - if (!noProxy) return false - const domains = noProxy.split(',').map((d) => d.trim().toLowerCase().replace(/:\d+$/, '')) - const host = hostname.toLowerCase() - return domains.some((d) => { - if (d === '*') return true - if (d.startsWith('.')) return host.endsWith(d) || host === d.slice(1) - return host === d || host.endsWith('.' + d) - }) -} - -function connectThroughProxy(proxyUrl, targetHost, targetPort) { - return new Promise((resolve, reject) => { - const proxy = new URL(proxyUrl) - const isHttpsProxy = proxy.protocol === 'https:' - const connectOptions = { - hostname: proxy.hostname, - port: proxy.port || (isHttpsProxy ? 443 : 80), - method: 'CONNECT', - path: `${targetHost}:${targetPort}`, - headers: { - Host: `${targetHost}:${targetPort}`, - }, - } - - if (proxy.username || proxy.password) { - const auth = Buffer.from( - `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent(proxy.password || '')}`, - ).toString('base64') - connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` - } - - const transport = isHttpsProxy ? https : http - const req = transport.request(connectOptions) - - req.on('connect', (res, socket) => { - if (res.statusCode === 200) { - resolve(socket) - } else { - socket.destroy() - reject( - new Error(`Proxy CONNECT failed with status ${res.statusCode}`), - ) - } - }) - - req.on('error', (err) => { - reject(new Error(`Proxy connection failed: ${err.message}`)) - }) - - req.setTimeout(CONFIG.requestTimeout, () => { - req.destroy() - reject(new Error('Proxy connection timeout.')) - }) - - req.end() - }) -} - const PLATFORM_TARGETS = { 'linux-x64': `${packageName}-linux-x64.tar.gz`, 'linux-arm64': `${packageName}-linux-arm64.tar.gz`, @@ -225,54 +160,6 @@ const term = { }, } -async function httpGet(url, options = {}) { - const parsedUrl = new URL(url) - const proxyUrl = getProxyUrl() - - const reqOptions = { - hostname: parsedUrl.hostname, - path: parsedUrl.pathname + parsedUrl.search, - headers: { - 'User-Agent': CONFIG.userAgent, - ...options.headers, - }, - } - - if (proxyUrl && !shouldBypassProxy(parsedUrl.hostname)) { - const tunnelSocket = await connectThroughProxy( - proxyUrl, - parsedUrl.hostname, - parsedUrl.port || 443, - ) - reqOptions.agent = false - reqOptions.createConnection = () => - tls.connect({ - socket: tunnelSocket, - servername: parsedUrl.hostname, - }) - } - - return new Promise((resolve, reject) => { - const req = https.get(reqOptions, (res) => { - if (res.statusCode === 302 || res.statusCode === 301) { - res.resume() - return httpGet(new URL(res.headers.location, url).href, options) - .then(resolve) - .catch(reject) - } - resolve(res) - }) - - req.on('error', reject) - - const timeout = options.timeout || CONFIG.requestTimeout - req.setTimeout(timeout, () => { - req.destroy() - reject(new Error('Request timeout.')) - }) - }) -} - async function getLatestVersion() { try { const res = await httpGet( diff --git a/cli/release-staging/package.json b/cli/release-staging/package.json index 23ae8cac37..f84bff8721 100644 --- a/cli/release-staging/package.json +++ b/cli/release-staging/package.json @@ -12,6 +12,7 @@ }, "files": [ "index.js", + "http.js", "postinstall.js", "README.md" ], diff --git a/cli/release/http.js b/cli/release/http.js new file mode 100644 index 0000000000..3419e80ca3 --- /dev/null +++ b/cli/release/http.js @@ -0,0 +1,176 @@ +const http = require('http') +const https = require('https') +const tls = require('tls') + +function createReleaseHttpClient({ + env = process.env, + userAgent, + requestTimeout, + httpModule = http, + httpsModule = https, + tlsModule = tls, +}) { + function getProxyUrl() { + return ( + env.HTTPS_PROXY || + env.https_proxy || + env.HTTP_PROXY || + env.http_proxy || + null + ) + } + + function shouldBypassProxy(hostname) { + const noProxy = env.NO_PROXY || env.no_proxy || '' + if (!noProxy) return false + + const domains = noProxy + .split(',') + .map((domain) => domain.trim().toLowerCase().replace(/:\d+$/, '')) + const host = hostname.toLowerCase() + + return domains.some((domain) => { + if (domain === '*') return true + if (domain.startsWith('.')) { + return host.endsWith(domain) || host === domain.slice(1) + } + return host === domain || host.endsWith(`.${domain}`) + }) + } + + function connectThroughProxy(proxyUrl, targetHost, targetPort) { + return new Promise((resolve, reject) => { + const proxy = new URL(proxyUrl) + const isHttpsProxy = proxy.protocol === 'https:' + const connectOptions = { + hostname: proxy.hostname, + port: proxy.port || (isHttpsProxy ? 443 : 80), + method: 'CONNECT', + path: `${targetHost}:${targetPort}`, + headers: { + Host: `${targetHost}:${targetPort}`, + }, + } + + if (proxy.username || proxy.password) { + const auth = Buffer.from( + `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent( + proxy.password || '', + )}`, + ).toString('base64') + connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` + } + + const transport = isHttpsProxy ? httpsModule : httpModule + const req = transport.request(connectOptions) + + req.on('connect', (res, socket) => { + if (res.statusCode === 200) { + resolve(socket) + return + } + + socket.destroy() + reject(new Error(`Proxy CONNECT failed with status ${res.statusCode}`)) + }) + + req.on('error', (error) => { + reject(new Error(`Proxy connection failed: ${error.message}`)) + }) + + req.setTimeout(requestTimeout, () => { + req.destroy() + reject(new Error('Proxy connection timeout.')) + }) + + req.end() + }) + } + + async function buildRequestOptions(url, options = {}) { + const parsedUrl = new URL(url) + const reqOptions = { + hostname: parsedUrl.hostname, + port: parsedUrl.port || 443, + path: parsedUrl.pathname + parsedUrl.search, + headers: { + 'User-Agent': userAgent, + ...options.headers, + }, + } + + const proxyUrl = getProxyUrl() + if (!proxyUrl || shouldBypassProxy(parsedUrl.hostname)) { + return reqOptions + } + + const tunnelSocket = await connectThroughProxy( + proxyUrl, + parsedUrl.hostname, + parsedUrl.port || 443, + ) + + class TunnelAgent extends httpsModule.Agent { + createConnection(_options, callback) { + const secureSocket = tlsModule.connect({ + socket: tunnelSocket, + servername: parsedUrl.hostname, + }) + + if (typeof callback === 'function') { + if (typeof secureSocket.once === 'function') { + let settled = false + const finish = (error) => { + if (settled) return + settled = true + callback(error || null, error ? undefined : secureSocket) + } + + secureSocket.once('secureConnect', () => finish(null)) + secureSocket.once('error', (error) => finish(error)) + } else { + callback(null, secureSocket) + } + } + + return secureSocket + } + } + + reqOptions.agent = new TunnelAgent({ keepAlive: false }) + return reqOptions + } + + async function httpGet(url, options = {}) { + const reqOptions = await buildRequestOptions(url, options) + + return new Promise((resolve, reject) => { + const req = httpsModule.get(reqOptions, (res) => { + if (res.statusCode === 301 || res.statusCode === 302) { + res.resume() + httpGet(new URL(res.headers.location, url).href, options) + .then(resolve) + .catch(reject) + return + } + + resolve(res) + }) + + req.on('error', reject) + req.setTimeout(options.timeout || requestTimeout, () => { + req.destroy() + reject(new Error('Request timeout.')) + }) + }) + } + + return { + getProxyUrl, + httpGet, + } +} + +module.exports = { + createReleaseHttpClient, +} diff --git a/cli/release/index.js b/cli/release/index.js index 3d22e65739..85c60ff392 100644 --- a/cli/release/index.js +++ b/cli/release/index.js @@ -6,10 +6,10 @@ const http = require('http') const https = require('https') const os = require('os') const path = require('path') -const tls = require('tls') const zlib = require('zlib') const tar = require('tar') +const { createReleaseHttpClient } = require('./http') const packageName = 'codebuff' @@ -66,6 +66,11 @@ function createConfig(packageName) { } const CONFIG = createConfig(packageName) +const { getProxyUrl, httpGet } = createReleaseHttpClient({ + env: process.env, + userAgent: CONFIG.userAgent, + requestTimeout: CONFIG.requestTimeout, +}) function getPostHogConfig() { const apiKey = @@ -130,76 +135,6 @@ function trackUpdateFailed(errorMessage, version, context = {}) { } } -function getProxyUrl() { - return ( - process.env.HTTPS_PROXY || - process.env.https_proxy || - process.env.HTTP_PROXY || - process.env.http_proxy || - null - ) -} - -function shouldBypassProxy(hostname) { - const noProxy = process.env.NO_PROXY || process.env.no_proxy || '' - if (!noProxy) return false - const domains = noProxy.split(',').map((d) => d.trim().toLowerCase().replace(/:\d+$/, '')) - const host = hostname.toLowerCase() - return domains.some((d) => { - if (d === '*') return true - if (d.startsWith('.')) return host.endsWith(d) || host === d.slice(1) - return host === d || host.endsWith('.' + d) - }) -} - -function connectThroughProxy(proxyUrl, targetHost, targetPort) { - return new Promise((resolve, reject) => { - const proxy = new URL(proxyUrl) - const isHttpsProxy = proxy.protocol === 'https:' - const connectOptions = { - hostname: proxy.hostname, - port: proxy.port || (isHttpsProxy ? 443 : 80), - method: 'CONNECT', - path: `${targetHost}:${targetPort}`, - headers: { - Host: `${targetHost}:${targetPort}`, - }, - } - - if (proxy.username || proxy.password) { - const auth = Buffer.from( - `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent(proxy.password || '')}`, - ).toString('base64') - connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` - } - - const transport = isHttpsProxy ? https : http - const req = transport.request(connectOptions) - - req.on('connect', (res, socket) => { - if (res.statusCode === 200) { - resolve(socket) - } else { - socket.destroy() - reject( - new Error(`Proxy CONNECT failed with status ${res.statusCode}`), - ) - } - }) - - req.on('error', (err) => { - reject(new Error(`Proxy connection failed: ${err.message}`)) - }) - - req.setTimeout(CONFIG.requestTimeout, () => { - req.destroy() - reject(new Error('Proxy connection timeout.')) - }) - - req.end() - }) -} - const PLATFORM_TARGETS = { 'linux-x64': `${packageName}-linux-x64.tar.gz`, 'linux-arm64': `${packageName}-linux-arm64.tar.gz`, @@ -224,54 +159,6 @@ const term = { }, } -async function httpGet(url, options = {}) { - const parsedUrl = new URL(url) - const proxyUrl = getProxyUrl() - - const reqOptions = { - hostname: parsedUrl.hostname, - path: parsedUrl.pathname + parsedUrl.search, - headers: { - 'User-Agent': CONFIG.userAgent, - ...options.headers, - }, - } - - if (proxyUrl && !shouldBypassProxy(parsedUrl.hostname)) { - const tunnelSocket = await connectThroughProxy( - proxyUrl, - parsedUrl.hostname, - parsedUrl.port || 443, - ) - reqOptions.agent = false - reqOptions.createConnection = () => - tls.connect({ - socket: tunnelSocket, - servername: parsedUrl.hostname, - }) - } - - return new Promise((resolve, reject) => { - const req = https.get(reqOptions, (res) => { - if (res.statusCode === 302 || res.statusCode === 301) { - res.resume() - return httpGet(new URL(res.headers.location, url).href, options) - .then(resolve) - .catch(reject) - } - resolve(res) - }) - - req.on('error', reject) - - const timeout = options.timeout || CONFIG.requestTimeout - req.setTimeout(timeout, () => { - req.destroy() - reject(new Error('Request timeout.')) - }) - }) -} - async function getLatestVersion() { try { const res = await httpGet( diff --git a/cli/release/package.json b/cli/release/package.json index efd5156709..a839a93a58 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -13,6 +13,7 @@ }, "files": [ "index.js", + "http.js", "postinstall.js", "README.md" ], diff --git a/cli/src/__tests__/release/proxy-http-get.test.ts b/cli/src/__tests__/release/proxy-http-get.test.ts new file mode 100644 index 0000000000..a0addd586a --- /dev/null +++ b/cli/src/__tests__/release/proxy-http-get.test.ts @@ -0,0 +1,237 @@ +import { describe, expect, test } from 'bun:test' +import { EventEmitter } from 'node:events' +import { createRequire } from 'node:module' +import { fileURLToPath } from 'node:url' +import { Readable } from 'node:stream' + +const require = createRequire(import.meta.url) + +const helperModules = [ + { + name: 'codebuff release helper', + path: fileURLToPath(new URL('../../../release/http.js', import.meta.url)), + }, + { + name: 'codebuff staging release helper', + path: fileURLToPath( + new URL('../../../release-staging/http.js', import.meta.url), + ), + }, + { + name: 'freebuff release helper', + path: fileURLToPath( + new URL('../../../../freebuff/cli/release/http.js', import.meta.url), + ), + }, +] + +function createResponse(statusCode: number, headers: Record, body = '') { + const response = Readable.from(body.length > 0 ? [body] : []) + return Object.assign(response, { + statusCode, + headers, + }) +} + +function createConnectRequest({ + statusCode = 200, + tunnelSocket, + recorder, +}: { + statusCode?: number + tunnelSocket: object + recorder: { timeoutCalls: number } +}) { + const emitter = new EventEmitter() + + return { + on(event: string, listener: (...args: any[]) => void) { + emitter.on(event, listener) + return this + }, + setTimeout() { + recorder.timeoutCalls += 1 + return this + }, + destroy() {}, + end() { + queueMicrotask(() => { + emitter.emit('connect', { statusCode }, tunnelSocket) + }) + }, + } +} + +for (const helperModule of helperModules) { + describe(helperModule.name, () => { + test('uses a tunnel agent instead of createConnection for proxied HTTPS requests', async () => { + const connectCalls: Array> = [] + const httpsGetCalls: Array> = [] + const tlsConnectCalls: Array> = [] + + const tunnelSocket = { kind: 'tunnel-socket' } + const tlsSocket = { kind: 'tls-socket' } + + const { createReleaseHttpClient } = require(helperModule.path) + + const client = createReleaseHttpClient({ + env: { + HTTPS_PROXY: 'http://proxy.internal:7890', + }, + userAgent: 'release-test-agent', + requestTimeout: 2500, + httpModule: { + request(options: Record) { + connectCalls.push(options) + return createConnectRequest({ + tunnelSocket, + recorder: { timeoutCalls: 0 }, + }) + }, + }, + httpsModule: { + Agent: class FakeAgent { + options: Record + + constructor(options: Record) { + this.options = options + } + }, + get(options: Record, callback: (response: Readable) => void) { + httpsGetCalls.push(options) + options.agent.createConnection(options) + queueMicrotask(() => { + callback(createResponse(200, {}, '{"version":"0.0.33"}')) + }) + return { + on() { + return this + }, + setTimeout() { + return this + }, + destroy() {}, + } + }, + }, + tlsModule: { + connect(options: Record) { + tlsConnectCalls.push(options) + return tlsSocket + }, + }, + }) + + const response = await client.httpGet( + 'https://registry.npmjs.org/freebuff/latest', + ) + response.resume() + + expect(connectCalls).toHaveLength(1) + expect(connectCalls[0]).toMatchObject({ + hostname: 'proxy.internal', + port: '7890', + method: 'CONNECT', + path: 'registry.npmjs.org:443', + headers: { + Host: 'registry.npmjs.org:443', + }, + }) + + expect(httpsGetCalls).toHaveLength(1) + expect(httpsGetCalls[0]?.createConnection).toBeUndefined() + expect(httpsGetCalls[0]?.agent).toBeDefined() + expect(httpsGetCalls[0]).toMatchObject({ + hostname: 'registry.npmjs.org', + path: '/freebuff/latest', + headers: { + 'User-Agent': 'release-test-agent', + }, + }) + + expect(tlsConnectCalls).toEqual([ + { + socket: tunnelSocket, + servername: 'registry.npmjs.org', + }, + ]) + }) + + test('reuses the same proxy strategy across redirects', async () => { + const httpsGetCalls: Array> = [] + + const { createReleaseHttpClient } = require(helperModule.path) + + let callCount = 0 + const client = createReleaseHttpClient({ + env: { + HTTPS_PROXY: 'http://proxy.internal:7890', + }, + userAgent: 'release-test-agent', + requestTimeout: 2500, + httpModule: { + request() { + return createConnectRequest({ + tunnelSocket: { kind: 'tunnel-socket' }, + recorder: { timeoutCalls: 0 }, + }) + }, + }, + httpsModule: { + Agent: class FakeAgent {}, + get(options: Record, callback: (response: Readable) => void) { + httpsGetCalls.push(options) + callCount += 1 + + queueMicrotask(() => { + if (callCount === 1) { + callback( + createResponse(302, { + location: '/redirected', + }), + ) + return + } + + callback(createResponse(200, {}, 'ok')) + }) + + return { + on() { + return this + }, + setTimeout() { + return this + }, + destroy() {}, + } + }, + }, + tlsModule: { + connect() { + return { kind: 'tls-socket' } + }, + }, + }) + + const response = await client.httpGet( + 'https://registry.npmjs.org/freebuff/latest', + ) + response.resume() + + expect(httpsGetCalls).toHaveLength(2) + expect(httpsGetCalls[0]).toMatchObject({ + hostname: 'registry.npmjs.org', + path: '/freebuff/latest', + }) + expect(httpsGetCalls[1]).toMatchObject({ + hostname: 'registry.npmjs.org', + path: '/redirected', + }) + expect(httpsGetCalls.every((call) => call.createConnection === undefined)).toBe( + true, + ) + expect(httpsGetCalls.every((call) => call.agent != null)).toBe(true) + }) + }) +} diff --git a/freebuff/cli/release/http.js b/freebuff/cli/release/http.js new file mode 100644 index 0000000000..3419e80ca3 --- /dev/null +++ b/freebuff/cli/release/http.js @@ -0,0 +1,176 @@ +const http = require('http') +const https = require('https') +const tls = require('tls') + +function createReleaseHttpClient({ + env = process.env, + userAgent, + requestTimeout, + httpModule = http, + httpsModule = https, + tlsModule = tls, +}) { + function getProxyUrl() { + return ( + env.HTTPS_PROXY || + env.https_proxy || + env.HTTP_PROXY || + env.http_proxy || + null + ) + } + + function shouldBypassProxy(hostname) { + const noProxy = env.NO_PROXY || env.no_proxy || '' + if (!noProxy) return false + + const domains = noProxy + .split(',') + .map((domain) => domain.trim().toLowerCase().replace(/:\d+$/, '')) + const host = hostname.toLowerCase() + + return domains.some((domain) => { + if (domain === '*') return true + if (domain.startsWith('.')) { + return host.endsWith(domain) || host === domain.slice(1) + } + return host === domain || host.endsWith(`.${domain}`) + }) + } + + function connectThroughProxy(proxyUrl, targetHost, targetPort) { + return new Promise((resolve, reject) => { + const proxy = new URL(proxyUrl) + const isHttpsProxy = proxy.protocol === 'https:' + const connectOptions = { + hostname: proxy.hostname, + port: proxy.port || (isHttpsProxy ? 443 : 80), + method: 'CONNECT', + path: `${targetHost}:${targetPort}`, + headers: { + Host: `${targetHost}:${targetPort}`, + }, + } + + if (proxy.username || proxy.password) { + const auth = Buffer.from( + `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent( + proxy.password || '', + )}`, + ).toString('base64') + connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` + } + + const transport = isHttpsProxy ? httpsModule : httpModule + const req = transport.request(connectOptions) + + req.on('connect', (res, socket) => { + if (res.statusCode === 200) { + resolve(socket) + return + } + + socket.destroy() + reject(new Error(`Proxy CONNECT failed with status ${res.statusCode}`)) + }) + + req.on('error', (error) => { + reject(new Error(`Proxy connection failed: ${error.message}`)) + }) + + req.setTimeout(requestTimeout, () => { + req.destroy() + reject(new Error('Proxy connection timeout.')) + }) + + req.end() + }) + } + + async function buildRequestOptions(url, options = {}) { + const parsedUrl = new URL(url) + const reqOptions = { + hostname: parsedUrl.hostname, + port: parsedUrl.port || 443, + path: parsedUrl.pathname + parsedUrl.search, + headers: { + 'User-Agent': userAgent, + ...options.headers, + }, + } + + const proxyUrl = getProxyUrl() + if (!proxyUrl || shouldBypassProxy(parsedUrl.hostname)) { + return reqOptions + } + + const tunnelSocket = await connectThroughProxy( + proxyUrl, + parsedUrl.hostname, + parsedUrl.port || 443, + ) + + class TunnelAgent extends httpsModule.Agent { + createConnection(_options, callback) { + const secureSocket = tlsModule.connect({ + socket: tunnelSocket, + servername: parsedUrl.hostname, + }) + + if (typeof callback === 'function') { + if (typeof secureSocket.once === 'function') { + let settled = false + const finish = (error) => { + if (settled) return + settled = true + callback(error || null, error ? undefined : secureSocket) + } + + secureSocket.once('secureConnect', () => finish(null)) + secureSocket.once('error', (error) => finish(error)) + } else { + callback(null, secureSocket) + } + } + + return secureSocket + } + } + + reqOptions.agent = new TunnelAgent({ keepAlive: false }) + return reqOptions + } + + async function httpGet(url, options = {}) { + const reqOptions = await buildRequestOptions(url, options) + + return new Promise((resolve, reject) => { + const req = httpsModule.get(reqOptions, (res) => { + if (res.statusCode === 301 || res.statusCode === 302) { + res.resume() + httpGet(new URL(res.headers.location, url).href, options) + .then(resolve) + .catch(reject) + return + } + + resolve(res) + }) + + req.on('error', reject) + req.setTimeout(options.timeout || requestTimeout, () => { + req.destroy() + reject(new Error('Request timeout.')) + }) + }) + } + + return { + getProxyUrl, + httpGet, + } +} + +module.exports = { + createReleaseHttpClient, +} diff --git a/freebuff/cli/release/index.js b/freebuff/cli/release/index.js index 56d8539df6..db7fe566a8 100644 --- a/freebuff/cli/release/index.js +++ b/freebuff/cli/release/index.js @@ -6,10 +6,10 @@ const http = require('http') const https = require('https') const os = require('os') const path = require('path') -const tls = require('tls') const zlib = require('zlib') const tar = require('tar') +const { createReleaseHttpClient } = require('./http') const packageName = 'freebuff' @@ -66,6 +66,11 @@ function createConfig(packageName) { } const CONFIG = createConfig(packageName) +const { getProxyUrl, httpGet } = createReleaseHttpClient({ + env: process.env, + userAgent: CONFIG.userAgent, + requestTimeout: CONFIG.requestTimeout, +}) function getPostHogConfig() { const apiKey = @@ -130,76 +135,6 @@ function trackUpdateFailed(errorMessage, version, context = {}) { } } -function getProxyUrl() { - return ( - process.env.HTTPS_PROXY || - process.env.https_proxy || - process.env.HTTP_PROXY || - process.env.http_proxy || - null - ) -} - -function shouldBypassProxy(hostname) { - const noProxy = process.env.NO_PROXY || process.env.no_proxy || '' - if (!noProxy) return false - const domains = noProxy.split(',').map((d) => d.trim().toLowerCase().replace(/:\d+$/, '')) - const host = hostname.toLowerCase() - return domains.some((d) => { - if (d === '*') return true - if (d.startsWith('.')) return host.endsWith(d) || host === d.slice(1) - return host === d || host.endsWith('.' + d) - }) -} - -function connectThroughProxy(proxyUrl, targetHost, targetPort) { - return new Promise((resolve, reject) => { - const proxy = new URL(proxyUrl) - const isHttpsProxy = proxy.protocol === 'https:' - const connectOptions = { - hostname: proxy.hostname, - port: proxy.port || (isHttpsProxy ? 443 : 80), - method: 'CONNECT', - path: `${targetHost}:${targetPort}`, - headers: { - Host: `${targetHost}:${targetPort}`, - }, - } - - if (proxy.username || proxy.password) { - const auth = Buffer.from( - `${decodeURIComponent(proxy.username || '')}:${decodeURIComponent(proxy.password || '')}`, - ).toString('base64') - connectOptions.headers['Proxy-Authorization'] = `Basic ${auth}` - } - - const transport = isHttpsProxy ? https : http - const req = transport.request(connectOptions) - - req.on('connect', (res, socket) => { - if (res.statusCode === 200) { - resolve(socket) - } else { - socket.destroy() - reject( - new Error(`Proxy CONNECT failed with status ${res.statusCode}`), - ) - } - }) - - req.on('error', (err) => { - reject(new Error(`Proxy connection failed: ${err.message}`)) - }) - - req.setTimeout(CONFIG.requestTimeout, () => { - req.destroy() - reject(new Error('Proxy connection timeout.')) - }) - - req.end() - }) -} - const PLATFORM_TARGETS = { 'linux-x64': `${packageName}-linux-x64.tar.gz`, 'linux-arm64': `${packageName}-linux-arm64.tar.gz`, @@ -224,54 +159,6 @@ const term = { }, } -async function httpGet(url, options = {}) { - const parsedUrl = new URL(url) - const proxyUrl = getProxyUrl() - - const reqOptions = { - hostname: parsedUrl.hostname, - path: parsedUrl.pathname + parsedUrl.search, - headers: { - 'User-Agent': CONFIG.userAgent, - ...options.headers, - }, - } - - if (proxyUrl && !shouldBypassProxy(parsedUrl.hostname)) { - const tunnelSocket = await connectThroughProxy( - proxyUrl, - parsedUrl.hostname, - parsedUrl.port || 443, - ) - reqOptions.agent = false - reqOptions.createConnection = () => - tls.connect({ - socket: tunnelSocket, - servername: parsedUrl.hostname, - }) - } - - return new Promise((resolve, reject) => { - const req = https.get(reqOptions, (res) => { - if (res.statusCode === 302 || res.statusCode === 301) { - res.resume() - return httpGet(new URL(res.headers.location, url).href, options) - .then(resolve) - .catch(reject) - } - resolve(res) - }) - - req.on('error', reject) - - const timeout = options.timeout || CONFIG.requestTimeout - req.setTimeout(timeout, () => { - req.destroy() - reject(new Error('Request timeout.')) - }) - }) -} - async function getLatestVersion() { try { const res = await httpGet( diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 50a6b6b395..3ca67ed820 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -12,6 +12,7 @@ }, "files": [ "index.js", + "http.js", "postinstall.js", "README.md" ], From e411821258ebb4a0d7731fd9175fe2a1d80bdede Mon Sep 17 00:00:00 2001 From: "aether-agent[bot]" <258877100+aether-agent[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 00:26:48 -0700 Subject: [PATCH 10/31] Remove evalbuff and expensivebuff (#493) Co-authored-by: CodebuffAI <189203002+CodebuffAI@users.noreply.github.com> From 45fe31291de9b9aa0c06984f179465c002c915c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=AE=B6=E5=90=8D?= Date: Sun, 19 Apr 2026 04:56:44 +0800 Subject: [PATCH 11/31] fix: correct code-map line counting (#508) --- packages/code-map/__tests__/parse.test.ts | 2 +- packages/code-map/src/parse.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/code-map/__tests__/parse.test.ts b/packages/code-map/__tests__/parse.test.ts index 57dd11251d..a15d881c05 100644 --- a/packages/code-map/__tests__/parse.test.ts +++ b/packages/code-map/__tests__/parse.test.ts @@ -132,7 +132,7 @@ describe('parse module', () => { () => multilineCode, ) - expect(result.numLines).toBe(2) // Due to operator precedence: .match(/\n/g)?.length ?? 0 + 1 becomes (2 ?? 1) = 2 + expect(result.numLines).toBe(3) }) it('should deduplicate identifiers and calls', () => { diff --git a/packages/code-map/src/parse.ts b/packages/code-map/src/parse.ts index 2ab2a0fc05..09c1866a2f 100644 --- a/packages/code-map/src/parse.ts +++ b/packages/code-map/src/parse.ts @@ -169,7 +169,7 @@ export function parseTokens( calls: [] as string[], } } - const numLines = sourceCode.match(/\n/g)?.length ?? 0 + 1 + const numLines = (sourceCode.match(/\n/g)?.length ?? 0) + 1 if (!parser || !query) { throw new Error('Parser or query not found') } From 5c518108db27c9bb652142f0060a43e610980d76 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 00:48:27 -0700 Subject: [PATCH 12/31] Revert restrictions on using paid codebuff --- .../completions/__tests__/completions.test.ts | 46 ++++------------ web/src/app/api/v1/chat/completions/_post.ts | 54 ++----------------- 2 files changed, 14 insertions(+), 86 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 4baee74992..0577cdcc99 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -18,32 +18,25 @@ import type { BlockGrantResult } from '@codebuff/billing/subscription' import type { GetUserPreferencesFn } from '../_post' describe('/api/v1/chat/completions POST endpoint', () => { - // Old enough to clear the account-age gate in _post.ts - const AGED_ACCOUNT_CREATED_AT = new Date('2024-01-01T00:00:00Z') - const mockUserData: Record< string, - { id: string; banned: boolean; created_at: Date } + { id: string; banned: boolean } > = { 'test-api-key-123': { id: 'user-123', banned: false, - created_at: AGED_ACCOUNT_CREATED_AT, }, 'test-api-key-no-credits': { id: 'user-no-credits', banned: false, - created_at: AGED_ACCOUNT_CREATED_AT, }, 'test-api-key-blocked': { id: 'banned-user-id', banned: true, - created_at: AGED_ACCOUNT_CREATED_AT, }, 'test-api-key-new-free': { id: 'user-new-free', banned: false, - created_at: new Date(), }, } @@ -57,7 +50,6 @@ describe('/api/v1/chat/completions POST endpoint', () => { return { id: userData.id, banned: userData.banned, - created_at: userData.created_at, } as Awaited> } @@ -82,15 +74,15 @@ describe('/api/v1/chat/completions POST endpoint', () => { ).toISOString() mockLogger = { - error: mock(() => {}), - warn: mock(() => {}), - info: mock(() => {}), - debug: mock(() => {}), + error: mock(() => { }), + warn: mock(() => { }), + info: mock(() => { }), + debug: mock(() => { }), } mockLoggerWithContext = mock(() => mockLogger) - mockTrackEvent = mock(() => {}) + mockTrackEvent = mock(() => { }) mockGetUserUsageData = mock(async ({ userId }: { userId: string }) => { if (userId === 'user-no-credits') { @@ -101,22 +93,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { totalDebt: 0, netBalance: 0, breakdown: {}, - // Has purchased credits historically (principals > 0) but 0 remaining - // so the paid-plan gate passes and the credit check is what enforces 402. - principals: { purchase: 100 }, - }, - nextQuotaReset, - } - } - if (userId === 'user-new-free') { - return { - usageThisCycle: 0, - balance: { - totalRemaining: 100, - totalDebt: 0, - netBalance: 100, - breakdown: {} as Record, - principals: {} as Record, + principals: {}, }, nextQuotaReset, } @@ -128,7 +105,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { totalDebt: 0, netBalance: 100, breakdown: {}, - principals: { purchase: 100 }, + principals: {}, }, nextQuotaReset, } @@ -474,7 +451,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(body.message).not.toContain(nextQuotaReset) }) - it('returns 403 for a free-tier user with no paid relationship', async () => { + it('lets a new account with no paid relationship through for non-free mode', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', { @@ -504,11 +481,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) - expect(response.status).toBe(403) - const body = await response.json() - expect(body.error).toBe('requires_paid_plan') + expect(response.status).toBe(200) }) + it('lets a BYOK free-tier new account through the paid-plan gate', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 4dfc69e133..21b0373f02 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -78,14 +78,6 @@ const FREE_MODE_ALLOWED_COUNTRIES = new Set([ 'NO', 'SE', 'NL', 'DK', 'DE', 'FI', 'BE', 'LU', 'CH', 'IE', 'IS', ]) -const MIN_ACCOUNT_AGE_DAYS = 3 -const MIN_ACCOUNT_AGE_FOR_PAID_MS = MIN_ACCOUNT_AGE_DAYS * 24 * 60 * 60 * 1000 - -// Emails allowed to bypass the paid+aged-account gate so integration tests -// (e.g. the SDK prompt-caching test) can run against a real server without -// needing to seed a purchase on every fresh test account. -const PAID_GATE_BYPASS_EMAILS = new Set(['team@codebuff.com']) - function extractClientIp(req: NextRequest): string | undefined { const forwardedFor = req.headers.get('x-forwarded-for') if (forwardedFor) { @@ -224,7 +216,7 @@ export async function postChatCompletions(params: { // Get user info const userInfo = await getUserInfoFromApiKey({ apiKey, - fields: ['id', 'email', 'discord_id', 'stripe_customer_id', 'banned', 'created_at'], + fields: ['id', 'email', 'discord_id', 'stripe_customer_id', 'banned'], logger, }) if (!userInfo) { @@ -520,50 +512,10 @@ export async function postChatCompletions(params: { // Fetch user credit data (includes subscription credits when block grant was ensured) const { - balance: { totalRemaining, principals }, + balance: { totalRemaining }, nextQuotaReset, } = await getUserUsageData({ userId, logger, includeSubscriptionCredits }) - // Gate non-free-mode requests behind (a) an established paid relationship - // AND (b) a non-new account. An ongoing abuse campaign uses freshly-signed-up - // self-referral accounts to burn credits via the stream-error billing gap in - // openrouter.ts; restricting to aged + paid accounts cuts off that vector. - // BYOK users bypass — they pay OpenRouter directly, so there's nothing to burn. - const openrouterApiKeyHeader = req.headers.get(BYOK_OPENROUTER_HEADER) - const hasPaidRelationship = - (principals.purchase ?? 0) > 0 || (principals.subscription ?? 0) > 0 - const accountAgeMs = userInfo.created_at - ? Date.now() - new Date(userInfo.created_at).getTime() - : 0 - const accountIsTooNew = accountAgeMs < MIN_ACCOUNT_AGE_FOR_PAID_MS - const isBypassedEmail = - !!userInfo.email && PAID_GATE_BYPASS_EMAILS.has(userInfo.email.toLowerCase()) - if ( - !isFreeModeRequest && - !openrouterApiKeyHeader && - !isBypassedEmail && - (!hasPaidRelationship || accountIsTooNew) - ) { - trackEvent({ - event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, - userId, - properties: { - error: 'blocked_for_free_tier', - model: typedBody.model, - hasPaidRelationship, - accountAgeMs, - }, - logger, - }) - return NextResponse.json( - { - error: 'requires_paid_plan', - message: `Non-free mode requires a paid subscription or purchased credits on an account at least ${MIN_ACCOUNT_AGE_DAYS} days old. Visit ${env.NEXT_PUBLIC_CODEBUFF_APP_URL}/usage to upgrade, or pass an OpenRouter API key to bring your own credits.`, - }, - { status: 403 }, - ) - } - // Credit check if (totalRemaining <= 0 && !isFreeModeRequest) { trackEvent({ @@ -584,7 +536,7 @@ export async function postChatCompletions(params: { ) } - const openrouterApiKey = openrouterApiKeyHeader + const openrouterApiKey = req.headers.get(BYOK_OPENROUTER_HEADER) // Handle streaming vs non-streaming try { From 282194ae84af1df1207e8a8b79b936ae5ee0de4a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 15:52:01 -0700 Subject: [PATCH 13/31] Fixes --- .../app/api/v1/freebuff/session/_handlers.ts | 78 +++++++++++++++---- web/src/server/fireworks-monitor/monitor.ts | 55 ++++++++++++- web/src/server/free-session/store.ts | 11 ++- 3 files changed, 121 insertions(+), 23 deletions(-) diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index a06ec19bc4..164f51f663 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -49,6 +49,38 @@ async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise return { userId: String(userInfo.id) } } +function serverError( + deps: FreebuffSessionDeps, + route: string, + userId: string | null, + error: unknown, +): NextResponse { + const err = error instanceof Error ? error : new Error(String(error)) + deps.logger.error( + { + route, + userId, + errorName: err.name, + errorMessage: err.message, + errorCode: (err as any).code, + cause: + (err as any).cause instanceof Error + ? { + name: (err as any).cause.name, + message: (err as any).cause.message, + code: (err as any).cause.code, + } + : (err as any).cause, + stack: err.stack, + }, + '[freebuff/session] handler failed', + ) + return NextResponse.json( + { error: 'internal_error', message: err.message }, + { status: 500 }, + ) +} + /** POST /api/v1/freebuff/session — join queue / take over as this instance. */ export async function postFreebuffSession( req: NextRequest, @@ -57,11 +89,15 @@ export async function postFreebuffSession( const auth = await resolveUser(req, deps) if ('error' in auth) return auth.error - const state = await requestSession({ - userId: auth.userId, - deps: deps.sessionDeps, - }) - return NextResponse.json(state, { status: 200 }) + try { + const state = await requestSession({ + userId: auth.userId, + deps: deps.sessionDeps, + }) + return NextResponse.json(state, { status: 200 }) + } catch (error) { + return serverError(deps, 'POST', auth.userId, error) + } } /** GET /api/v1/freebuff/session — read current state without mutation. */ @@ -72,17 +108,21 @@ export async function getFreebuffSession( const auth = await resolveUser(req, deps) if ('error' in auth) return auth.error - const state = await getSessionState({ - userId: auth.userId, - deps: deps.sessionDeps, - }) - if (!state) { - return NextResponse.json( - { status: 'none', message: 'Call POST to join the waiting room.' }, - { status: 200 }, - ) + try { + const state = await getSessionState({ + userId: auth.userId, + deps: deps.sessionDeps, + }) + if (!state) { + return NextResponse.json( + { status: 'none', message: 'Call POST to join the waiting room.' }, + { status: 200 }, + ) + } + return NextResponse.json(state, { status: 200 }) + } catch (error) { + return serverError(deps, 'GET', auth.userId, error) } - return NextResponse.json(state, { status: 200 }) } /** DELETE /api/v1/freebuff/session — end session / leave queue immediately. */ @@ -93,6 +133,10 @@ export async function deleteFreebuffSession( const auth = await resolveUser(req, deps) if ('error' in auth) return auth.error - await endUserSession({ userId: auth.userId, deps: deps.sessionDeps }) - return NextResponse.json({ status: 'ended' }, { status: 200 }) + try { + await endUserSession({ userId: auth.userId, deps: deps.sessionDeps }) + return NextResponse.json({ status: 'ended' }, { status: 200 }) + } catch (error) { + return serverError(deps, 'DELETE', auth.userId, error) + } } diff --git a/web/src/server/fireworks-monitor/monitor.ts b/web/src/server/fireworks-monitor/monitor.ts index ffc452e999..501e90d3bd 100644 --- a/web/src/server/fireworks-monitor/monitor.ts +++ b/web/src/server/fireworks-monitor/monitor.ts @@ -108,10 +108,46 @@ function jittered(intervalMs: number): number { return Math.max(1_000, Math.round(intervalMs + delta)) } +/** Unwrap nested `.cause` chains (undici's `fetch failed` wraps the real + * error — DNS, ECONNREFUSED, TLS, etc. — under `.cause`). */ +function describeError(error: unknown): { + message: string + name?: string + code?: string + causes: Array<{ name?: string; message: string; code?: string }> + stack?: string +} { + const causes: Array<{ name?: string; message: string; code?: string }> = [] + let cursor: unknown = error instanceof Error ? (error as any).cause : undefined + let guard = 0 + while (cursor && guard < 5) { + if (cursor instanceof Error) { + causes.push({ + name: cursor.name, + message: cursor.message, + code: (cursor as any).code, + }) + cursor = (cursor as any).cause + } else { + causes.push({ message: String(cursor) }) + break + } + guard++ + } + return { + message: error instanceof Error ? error.message : String(error), + name: error instanceof Error ? error.name : undefined, + code: error instanceof Error ? (error as any).code : undefined, + causes, + stack: error instanceof Error ? error.stack : undefined, + } +} + async function pollOnce(): Promise { if (!state) return const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS) + const url = FIREWORKS_METRICS_URL(state.options.accountId) try { const metrics = await scrapeFireworksMetrics({ apiKey: state.options.apiKey, @@ -123,8 +159,8 @@ async function pollOnce(): Promise { state.lastError = null state.backoffUntil = 0 } catch (error) { - const message = error instanceof Error ? error.message : String(error) - state.lastError = message + const details = describeError(error) + state.lastError = details.message if (error instanceof FireworksScrapeError && error.status === 429) { const backoffMs = error.retryAfterMs ?? DEFAULT_429_BACKOFF_MS state.backoffUntil = Date.now() + backoffMs @@ -133,7 +169,20 @@ async function pollOnce(): Promise { '[FireworksMonitor] Rate limited, backing off', ) } else { - logger.warn({ error: message }, '[FireworksMonitor] Scrape failed') + logger.warn( + { + error: details.message, + errorName: details.name, + errorCode: details.code, + causes: details.causes, + aborted: controller.signal.aborted, + url, + accountId: state.options.accountId, + usingCustomFetch: Boolean(state.options.fetch), + stack: details.stack, + }, + '[FireworksMonitor] Scrape failed', + ) } } finally { clearTimeout(timeout) diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index fdc6b14b1e..332d145c8e 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -52,6 +52,11 @@ export async function joinOrTakeOver(params: { const { userId, now } = params const nextInstanceId = newInstanceId() + // postgres-js does NOT coerce raw JS Date values when they're interpolated + // inside a `sql\`...\`` fragment (the column-type hint that Drizzle's + // values() path relies on is absent there). Pre-serialize to an ISO string + // and cast to timestamptz so the driver binds it as text. + const nowIso = sql`${now.toISOString()}::timestamptz` // Single UPSERT that encodes every case in one round-trip, race-safe // against concurrent POSTs for the same user (the PK would otherwise turn // two parallel INSERTs into a 500). Inside ON CONFLICT DO UPDATE, bare @@ -63,7 +68,7 @@ export async function joinOrTakeOver(params: { // queued → rotate instance_id, preserve queued_at // active & expired → re-queue at back: status=queued, // queued_at=now, admitted_at/expires_at=null - const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${now}` + const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${nowIso}` const [row] = await db .insert(schema.freeSession) @@ -84,7 +89,7 @@ export async function joinOrTakeOver(params: { queued_at: sql`CASE WHEN ${schema.freeSession.status} = 'queued' THEN ${schema.freeSession.queued_at} WHEN ${activeUnexpired} THEN ${schema.freeSession.queued_at} - ELSE ${now} + ELSE ${nowIso} END`, admitted_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.admitted_at} ELSE NULL END`, expires_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.expires_at} ELSE NULL END`, @@ -152,7 +157,7 @@ export async function queuePositionFor(params: { .where( and( eq(schema.freeSession.status, 'queued'), - sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt}, ${params.userId})`, + sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt.toISOString()}::timestamptz, ${params.userId})`, ), ) return Number(rows[0]?.n ?? 0) From 8ca704aad568229c2dc881aa6c2aa8804f97a695 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:06:39 -0700 Subject: [PATCH 14/31] Admit one user per 30s --- web/src/server/free-session/config.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 1fc5dc1424..5bf26b86ce 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -7,14 +7,15 @@ import { env } from '@codebuff/internal/env' */ export const FREEBUFF_ADMISSION_LOCK_ID = 573924815 -/** Admission tick cadence. Fast enough to drain the queue promptly, slow - * enough to avoid DB churn. */ -export const ADMISSION_TICK_MS = 5_000 +/** Admission tick cadence. Paired with MAX_ADMITS_PER_TICK=1 this staggers + * admissions so newly-admitted CLIs don't all POST to the + * Fireworks deployment simultaneously. */ +export const ADMISSION_TICK_MS = 15_000 -/** Max users admitted in a single tick. Protects against thundering-herd - * admissions when capacity frees up all at once (e.g. after a Fireworks - * incident clears). */ -export const MAX_ADMITS_PER_TICK = 20 +/** Max users admitted in a single tick. Staggering matters more than + * throughput here: keeps load on Fireworks smooth even when a + * large block of sessions expires at once. */ +export const MAX_ADMITS_PER_TICK = 1 export function isWaitingRoomEnabled(): boolean { return env.FREEBUFF_WAITING_ROOM_ENABLED From 4a0efb8470c723e5cb0d441ab064616cc8b94204 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:28:23 -0700 Subject: [PATCH 15/31] Detect cold Fireworks deployments; tighten TTFT/queue thresholds Replicas=0 or no replicas metric at all (the deployment has been scaled to zero or dropped from the scrape) now flips that deployment's health to unhealthy unconditionally, so admission fails closed instead of funneling users to a backend that cannot serve traffic. Also drop generationQueueMs degraded 5000 -> 400 and ttftMs degraded 8000 -> 2000, and comment out the kimi deployment since only glm-5.1 is in production. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/check-fireworks-health.ts | 1 + web/src/llm-api/fireworks-config.ts | 2 +- .../__tests__/compute-health.test.ts | 40 +++++++++++++++++++ .../__tests__/monitor.test.ts | 1 + .../fireworks-monitor/compute-health.ts | 28 +++++++++++-- web/src/server/fireworks-monitor/types.ts | 3 ++ 6 files changed, 71 insertions(+), 4 deletions(-) diff --git a/scripts/check-fireworks-health.ts b/scripts/check-fireworks-health.ts index f534653c81..6d51ab9d46 100644 --- a/scripts/check-fireworks-health.ts +++ b/scripts/check-fireworks-health.ts @@ -110,6 +110,7 @@ async function main() { console.log(`── ${color}${health.status.toUpperCase().padEnd(9)}${RESET} ${model}`) console.log(` deployment: ${deployment}`) console.log(` base model: ${health.baseModel ?? 'n/a'}`) + console.log(` replicas: ${health.metrics.replicas ?? 'n/a'}`) console.log(` request rate: ${health.metrics.requestRate.toFixed(3)} req/s`) console.log(` error rate: ${health.metrics.errorRate.toFixed(3)} err/s (${formatPct(health.metrics.errorFraction)})`) console.log(` concurrent requests: ${health.metrics.concurrentRequests.toFixed(2)}`) diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts index c19f7dc5bc..f79815fb5c 100644 --- a/web/src/llm-api/fireworks-config.ts +++ b/web/src/llm-api/fireworks-config.ts @@ -10,6 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217' export const FIREWORKS_DEPLOYMENT_MAP: Record = { // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', - 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2', + // 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2', 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', } diff --git a/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts b/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts index 30fba28a9e..d62dab938e 100644 --- a/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts +++ b/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts @@ -18,9 +18,18 @@ function fixture(params: { kvSlots?: number queueBuckets?: Array<{ le: string; count: number }> ttftBuckets?: Array<{ le: string; count: number }> + /** deployment_replicas gauge. Defaults to 1 so existing tests stay healthy. + * Set to 0 or null to simulate a cold/deleted deployment. */ + replicas?: number | null }): string { const lines: string[] = [] const labels = `base_model="m",deployment="${DEPLOYMENT}",deployment_account="test-acc",deployment_id="d1"` + const replicas = params.replicas === undefined ? 1 : params.replicas + if (replicas !== null) { + lines.push( + `deployment_replicas{deployment_account="test-acc",deployment_id="d1"} ${replicas}`, + ) + } if (params.requestRate !== undefined) { lines.push(`request_counter_total:sum_by_deployment{${labels}} ${params.requestRate}`) } @@ -182,9 +191,38 @@ describe('computeDeploymentHealth', () => { expect(health.reasons.some((r) => r.includes('error rate'))).toBe(true) }) + test('flags deployment with zero replicas as unhealthy', () => { + const metrics = parsePrometheusText( + fixture({ requestRate: 0, errorRate: 0, kvBlocks: 0, replicas: 0 }), + ) + const health = computeDeploymentHealth({ + deployment: DEPLOYMENT, + metrics, + thresholds: DEFAULT_HEALTH_THRESHOLDS, + }) + expect(health.status).toBe('unhealthy') + expect(health.metrics.replicas).toBe(0) + expect(health.reasons.some((r) => r.includes('replicas'))).toBe(true) + }) + + test('flags deployment with no replicas metric as unhealthy (cold / deleted)', () => { + const metrics = parsePrometheusText( + fixture({ requestRate: 0, errorRate: 0, kvBlocks: 0, replicas: null }), + ) + const health = computeDeploymentHealth({ + deployment: DEPLOYMENT, + metrics, + thresholds: DEFAULT_HEALTH_THRESHOLDS, + }) + expect(health.status).toBe('unhealthy') + expect(health.metrics.replicas).toBeNull() + expect(health.reasons.some((r) => r.includes('cold or deleted'))).toBe(true) + }) + test('sums error counters across multiple HTTP codes', () => { const labels = `base_model="m",deployment="${DEPLOYMENT}",deployment_id="d1"` const text = [ + `deployment_replicas{deployment_account="test-acc",deployment_id="d1"} 1`, `request_counter_total:sum_by_deployment{${labels}} 100`, `requests_error_total:sum_by_deployment{${labels},http_code="500"} 3`, `requests_error_total:sum_by_deployment{${labels},http_code="429"} 5`, @@ -231,9 +269,11 @@ describe('computeSnapshot', () => { test('overall status is the worst across deployments', () => { const dep2 = 'accounts/test-acc/deployments/d2' const text = [ + `deployment_replicas{deployment_id="d1"} 1`, `request_counter_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 100`, `requests_error_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1",http_code="500"} 0`, `generator_kv_blocks_fraction:avg_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 0.1`, + `deployment_replicas{deployment_id="d2"} 1`, `request_counter_total:sum_by_deployment{deployment="${dep2}",deployment_id="d2"} 100`, `requests_error_total:sum_by_deployment{deployment="${dep2}",deployment_id="d2",http_code="500"} 30`, `generator_kv_blocks_fraction:avg_by_deployment{deployment="${dep2}",deployment_id="d2"} 0.1`, diff --git a/web/src/server/fireworks-monitor/__tests__/monitor.test.ts b/web/src/server/fireworks-monitor/__tests__/monitor.test.ts index 08dbc8ad3a..c437842384 100644 --- a/web/src/server/fireworks-monitor/__tests__/monitor.test.ts +++ b/web/src/server/fireworks-monitor/__tests__/monitor.test.ts @@ -17,6 +17,7 @@ afterEach(() => { const DEPLOYMENT = 'accounts/test-acc/deployments/d1' const HEALTHY_BODY = [ + `deployment_replicas{deployment_id="d1"} 1`, `request_counter_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 10`, `requests_error_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1",http_code="500"} 0`, `generator_kv_blocks_fraction:avg_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 0.1`, diff --git a/web/src/server/fireworks-monitor/compute-health.ts b/web/src/server/fireworks-monitor/compute-health.ts index 72efa8b3a8..9cc6e94714 100644 --- a/web/src/server/fireworks-monitor/compute-health.ts +++ b/web/src/server/fireworks-monitor/compute-health.ts @@ -49,9 +49,9 @@ export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = { errorFractionUnhealthy: 0.1, kvBlocksFractionDegraded: 0.95, kvBlocksFractionUnhealthy: 0.99, - generationQueueMsDegraded: 5_000, + generationQueueMsDegraded: 400, generationQueueMsUnhealthy: 15_000, - ttftMsDegraded: 8_000, + ttftMsDegraded: 2_000, ttftMsUnhealthy: 30_000, } @@ -69,6 +69,15 @@ export function computeDeploymentHealth(params: { }): DeploymentHealth { const { deployment, metrics, thresholds } = params const filter = { deployment } + const deploymentId = parseDeploymentId(deployment) + + // `deployment_replicas` is keyed by deployment_id (not the full deployment + // path). Zero or missing replicas means the deployment is cold / scaled to + // zero / deleted — admission must fail closed in that case. + const replicasSamples = findSamples(metrics, 'deployment_replicas', { + deployment_id: deploymentId, + }) + const replicas = replicasSamples.length > 0 ? sumSamples(replicasSamples) : null const requestRateSamples = findSamples( metrics, @@ -121,7 +130,6 @@ export function computeDeploymentHealth(params: { ...errorRateSamples, ].find((s) => s.labels.base_model) const baseModel = baseModelSample?.labels.base_model ?? null - const deploymentId = baseModelSample?.labels.deployment_id ?? parseDeploymentId(deployment) const reasons: string[] = [] let status: DeploymentHealthStatus = 'healthy' @@ -130,6 +138,18 @@ export function computeDeploymentHealth(params: { if (STATUS_RANK[next] > STATUS_RANK[status]) status = next } + // A deployment with no running replicas cannot serve traffic. Treat as + // unhealthy unconditionally so admission stops funneling users to a cold + // backend. Missing gauge (`replicas === null`) is the strongest signal + // Fireworks has dropped the deployment from its scrape entirely. + if (replicas === null) { + reasons.push('no replicas metric — deployment cold or deleted') + upgrade('unhealthy') + } else if (replicas <= 0) { + reasons.push(`replicas=${replicas}`) + upgrade('unhealthy') + } + if (requestRate >= thresholds.minRequestRateForErrorCheck) { if (errorFraction >= thresholds.errorFractionUnhealthy) { reasons.push(`error rate ${(errorFraction * 100).toFixed(1)}% ≥ ${(thresholds.errorFractionUnhealthy * 100).toFixed(1)}%`) @@ -175,6 +195,7 @@ export function computeDeploymentHealth(params: { status, reasons, metrics: { + replicas, requestRate, errorRate, errorFraction, @@ -223,6 +244,7 @@ export function computeSnapshot(params: { status: 'unknown', reasons: ['no scrape yet'], metrics: { + replicas: null, requestRate: 0, errorRate: 0, errorFraction: 0, diff --git a/web/src/server/fireworks-monitor/types.ts b/web/src/server/fireworks-monitor/types.ts index 51f45ed8a5..cc10a610ea 100644 --- a/web/src/server/fireworks-monitor/types.ts +++ b/web/src/server/fireworks-monitor/types.ts @@ -18,6 +18,9 @@ export interface DeploymentHealth { status: DeploymentHealthStatus reasons: string[] metrics: { + /** null when Fireworks doesn't emit a deployment_replicas gauge for the + * deployment (cold / deleted / not-yet-scraped). 0 means scaled-to-zero. */ + replicas: number | null requestRate: number errorRate: number errorFraction: number From f5f2f607db65ddf84007bffa1435d934af05027b Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:28:42 -0700 Subject: [PATCH 16/31] Drop MAX_CONCURRENT_SESSIONS; drip admission is sole concurrency control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FREEBUFF_MAX_CONCURRENT_SESSIONS is gone. Admission now runs purely as a drip (MAX_ADMITS_PER_TICK=1 every 15s) gated by the Fireworks health monitor — utilisation ramps up slowly and pauses the moment metrics degrade, so a static cap is redundant. Renamed SessionDeps' getMaxConcurrentSessions/getSessionLengthMs to getAdmissionTickMs/getMaxAdmitsPerTick (those are what the wait-time estimate actually needs now). estimateWaitMs is rewritten from the session-cycle model to the drip model: waitMs = ceil((position - 1) / maxAdmitsPerTick) * admissionTickMs Dropped the 'full' branch of AdmissionTickResult and the full-capacity admission test — the only reason admission skips now is health. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/freebuff-waiting-room.md | 25 ++++----- packages/internal/src/env-schema.ts | 2 - .../session/__tests__/session.test.ts | 4 +- .../free-session/__tests__/admission.test.ts | 34 +++--------- .../free-session/__tests__/public-api.test.ts | 7 +-- .../__tests__/session-view.test.ts | 53 ++++++++++--------- web/src/server/free-session/admission.ts | 30 +++++------ web/src/server/free-session/config.ts | 4 -- web/src/server/free-session/public-api.ts | 16 +++--- web/src/server/free-session/session-view.ts | 34 ++++++------ 10 files changed, 92 insertions(+), 117 deletions(-) diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 73ebe79b65..47ab38b802 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -4,8 +4,8 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs: -1. **Bound concurrency** — cap the number of simultaneously-active free users so one deployment does not degrade under load. -2. **Gate on upstream health** — only admit new users while the Fireworks deployment is reporting `healthy` (via the separate monitor in `web/src/server/fireworks-monitor/`). +1. **Drip-admit users** — admit at a steady trickle (default 1 per 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long. +2. **Gate on upstream health** — only admit new users while the Fireworks deployment is reporting `healthy` (via the separate monitor in `web/src/server/fireworks-monitor/`). Once metrics degrade, admission halts until they recover — this is the primary concurrency control, not a static cap. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. Users who cannot be admitted immediately are placed in a FIFO queue and given an estimated wait time. Admitted users get a fixed-length session (default 1h) during which they can make free-mode requests subject to the existing per-user rate limits. @@ -20,7 +20,6 @@ FREEBUFF_WAITING_ROOM_ENABLED=false # Other knobs (only read when enabled) FREEBUFF_SESSION_LENGTH_MS=3600000 # 1 hour -FREEBUFF_MAX_CONCURRENT_SESSIONS=50 ``` Flipping the flag is safe at runtime: existing rows stay in the DB and will be admitted / expired correctly whenever the flag is flipped back on. @@ -127,17 +126,15 @@ Each tick does (in order): 1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now()`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage. 2. **Check upstream health.** `isFireworksAdmissible()` from the monitor. If not `healthy`, skip admission for this tick (queue grows; users see `status: 'queued'` with increasing position). -3. **Measure capacity.** `capacity = min(MAX_CONCURRENT - activeCount, MAX_ADMITS_PER_TICK)`. `MAX_ADMITS_PER_TICK=20` caps thundering-herd admission when a large block of sessions expires simultaneously. -4. **Admit.** `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT capacity FOR UPDATE SKIP LOCKED`, then `UPDATE` those rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. +3. **Admit.** `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT MAX_ADMITS_PER_TICK FOR UPDATE SKIP LOCKED`, then `UPDATE` those rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. Staggering the queue at `MAX_ADMITS_PER_TICK=1` / 15s keeps Fireworks from getting hit by a thundering herd of newly-admitted CLIs; once metrics show the deployment is saturated, step 2 halts further admissions. ### Tunables | Constant | Location | Default | Purpose | |---|---|---|---| -| `ADMISSION_TICK_MS` | `config.ts` | 5000 | How often the ticker fires | -| `MAX_ADMITS_PER_TICK` | `config.ts` | 20 | Upper bound on admits per tick | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires | +| `MAX_ADMITS_PER_TICK` | `config.ts` | 1 | Upper bound on admits per tick | | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | -| `FREEBUFF_MAX_CONCURRENT_SESSIONS` | env | 50 | Global active-session cap | ## HTTP API @@ -210,18 +207,18 @@ When the waiting room is disabled, the gate returns `{ ok: true, reason: 'disabl ## Estimated Wait Time -Computed in `session-view.ts` as an **upper bound** that assumes uniform session expiry: +Computed in `session-view.ts` from the drip-admission rate: ``` -waves = floor((position - 1) / maxConcurrent) -waitMs = waves * sessionLengthMs +ticksAhead = ceil((position - 1) / maxAdmitsPerTick) +waitMs = ticksAhead * admissionTickMs ``` -- Position 1..`maxConcurrent` → 0 (next tick will admit them) -- Position `maxConcurrent`+1..`2*maxConcurrent` → one full session length +- Position 1 → 0 (next tick admits you) +- Position `maxAdmitsPerTick` + 1 → one tick - and so on. -Actual wait is usually shorter because users call `DELETE /session` on CLI exit and sessions turn over naturally. We show an upper bound because under-promising on wait time is better UX than surprise delays. +This estimate **ignores health-gated pauses**: during a Fireworks incident admission halts entirely, so the actual wait can be longer. We choose to under-report here because showing "unknown" / "indefinite" is worse UX for the common case where the deployment is healthy. ## CLI Integration (frontend-side contract) diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index 13d934fb57..828a64c93f 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -42,7 +42,6 @@ export const serverEnvSchema = clientEnvSchema.extend({ .default('false') .transform((v) => v === 'true'), FREEBUFF_SESSION_LENGTH_MS: z.coerce.number().int().positive().default(60 * 60 * 1000), - FREEBUFF_MAX_CONCURRENT_SESSIONS: z.coerce.number().int().positive().default(50), }) export const serverEnvVars = serverEnvSchema.keyof().options export type ServerEnvVar = (typeof serverEnvVars)[number] @@ -94,5 +93,4 @@ export const serverProcessEnv: ServerInput = { // Freebuff waiting room FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED, FREEBUFF_SESSION_LENGTH_MS: process.env.FREEBUFF_SESSION_LENGTH_MS, - FREEBUFF_MAX_CONCURRENT_SESSIONS: process.env.FREEBUFF_MAX_CONCURRENT_SESSIONS, } diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 226a2a0a5e..cbdf4d6cfa 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -28,8 +28,8 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { return { rows, isWaitingRoomEnabled: () => true, - getMaxConcurrentSessions: () => 10, - getSessionLengthMs: () => 60 * 60_000, + getAdmissionTickMs: () => 15_000, + getMaxAdmitsPerTick: () => 1, now: () => now, getSessionRow: async (userId) => rows.get(userId) ?? null, queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length, diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index 613aeeadd6..e9620b3994 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -20,7 +20,7 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep return Array.from({ length: limit }, (_, i) => ({ user_id: `u${i}` })) }, isFireworksAdmissible: () => true, - getMaxConcurrentSessions: () => 10, + getMaxAdmitsPerTick: () => 1, getSessionLengthMs: () => 60 * 60 * 1000, now: () => NOW, ...overrides, @@ -28,45 +28,28 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep } describe('runAdmissionTick', () => { - test('admits up to (max - active) when healthy', async () => { - const deps = makeAdmissionDeps({ - countActive: async () => 3, - getMaxConcurrentSessions: () => 10, - }) + test('admits maxAdmitsPerTick when healthy', async () => { + const deps = makeAdmissionDeps({ getMaxAdmitsPerTick: () => 2 }) const result = await runAdmissionTick(deps) - expect(result.admitted).toBe(7) + expect(result.admitted).toBe(2) expect(result.skipped).toBeNull() }) - test('caps admits per tick at MAX_ADMITS_PER_TICK', async () => { - const deps = makeAdmissionDeps({ - countActive: async () => 0, - getMaxConcurrentSessions: () => 1000, - }) + test('defaults to 1 admit per tick', async () => { + const deps = makeAdmissionDeps() const result = await runAdmissionTick(deps) - expect(result.admitted).toBe(20) + expect(result.admitted).toBe(1) }) test('skips admission when Fireworks not healthy', async () => { const deps = makeAdmissionDeps({ isFireworksAdmissible: () => false, - countActive: async () => 0, }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) expect(result.skipped).toBe('health') }) - test('skips when at capacity', async () => { - const deps = makeAdmissionDeps({ - countActive: async () => 10, - getMaxConcurrentSessions: () => 10, - }) - const result = await runAdmissionTick(deps) - expect(result.admitted).toBe(0) - expect(result.skipped).toBe('full') - }) - test('sweeps expired sessions even when skipping admission', async () => { let swept = 0 const deps = makeAdmissionDeps({ @@ -85,10 +68,9 @@ describe('runAdmissionTick', () => { const deps = makeAdmissionDeps({ sweepExpired: async () => 2, countActive: async () => 5, - getMaxConcurrentSessions: () => 8, }) const result = await runAdmissionTick(deps) expect(result.expired).toBe(2) - expect(result.admitted).toBe(3) + expect(result.admitted).toBe(1) }) }) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index e7ba5ee9c0..3193e972c2 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -11,7 +11,8 @@ import type { SessionDeps } from '../public-api' import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 -const MAX_CONC = 10 +const TICK_MS = 15_000 +const ADMITS_PER_TICK = 1 function makeDeps(overrides: Partial = {}): SessionDeps & { rows: Map @@ -35,8 +36,8 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { }, _now: () => currentNow, isWaitingRoomEnabled: () => true, - getMaxConcurrentSessions: () => MAX_CONC, - getSessionLengthMs: () => SESSION_LEN, + getAdmissionTickMs: () => TICK_MS, + getMaxAdmitsPerTick: () => ADMITS_PER_TICK, now: () => currentNow, getSessionRow: async (userId) => rows.get(userId) ?? null, endSession: async (userId) => { diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index fa5f891ab8..f519b0681c 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -4,8 +4,8 @@ import { estimateWaitMs, toSessionStateResponse } from '../session-view' import type { InternalSessionRow } from '../types' -const SESSION_LEN = 60 * 60 * 1000 -const MAX_CONC = 50 +const TICK_MS = 15_000 +const ADMITS_PER_TICK = 1 function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') @@ -23,35 +23,43 @@ function row(overrides: Partial = {}): InternalSessionRow { } describe('estimateWaitMs', () => { - test('position <= capacity → 0 wait', () => { - expect(estimateWaitMs({ position: 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(0) - expect(estimateWaitMs({ position: MAX_CONC, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(0) + test('position 1 → 0 wait (next tick picks you up)', () => { + expect(estimateWaitMs({ position: 1, admissionTickMs: TICK_MS, maxAdmitsPerTick: ADMITS_PER_TICK })).toBe(0) }) - test('position in second wave → one full session length', () => { - expect(estimateWaitMs({ position: MAX_CONC + 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(SESSION_LEN) + test('position N → (N-1) ticks ahead at 1 admit/tick', () => { + expect(estimateWaitMs({ position: 2, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(TICK_MS) + expect(estimateWaitMs({ position: 10, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(9 * TICK_MS) }) - test('position in third wave → two full session lengths', () => { - expect(estimateWaitMs({ position: 2 * MAX_CONC + 1, maxConcurrent: MAX_CONC, sessionLengthMs: SESSION_LEN })).toBe(2 * SESSION_LEN) + test('batched admission divides wait', () => { + // 5 admits/tick: positions 2-6 all sit one tick ahead. + expect(estimateWaitMs({ position: 2, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(TICK_MS) + expect(estimateWaitMs({ position: 6, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(TICK_MS) + // Position 7 enters the second tick. + expect(estimateWaitMs({ position: 7, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(2 * TICK_MS) }) test('degenerate inputs return 0', () => { - expect(estimateWaitMs({ position: 0, maxConcurrent: 10, sessionLengthMs: 1000 })).toBe(0) - expect(estimateWaitMs({ position: 5, maxConcurrent: 0, sessionLengthMs: 1000 })).toBe(0) + expect(estimateWaitMs({ position: 0, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(0) + expect(estimateWaitMs({ position: 5, admissionTickMs: 0, maxAdmitsPerTick: 1 })).toBe(0) + expect(estimateWaitMs({ position: 5, admissionTickMs: TICK_MS, maxAdmitsPerTick: 0 })).toBe(0) }) }) describe('toSessionStateResponse', () => { const now = new Date('2026-04-17T12:00:00Z') + const baseArgs = { + admissionTickMs: TICK_MS, + maxAdmitsPerTick: ADMITS_PER_TICK, + } test('returns null when row is null', () => { const view = toSessionStateResponse({ row: null, position: 0, queueDepth: 0, - maxConcurrent: MAX_CONC, - sessionLengthMs: SESSION_LEN, + ...baseArgs, now, }) expect(view).toBeNull() @@ -60,18 +68,17 @@ describe('toSessionStateResponse', () => { test('queued row maps to queued response with position + wait estimate', () => { const view = toSessionStateResponse({ row: row({ status: 'queued' }), - position: 51, - queueDepth: 100, - maxConcurrent: MAX_CONC, - sessionLengthMs: SESSION_LEN, + position: 3, + queueDepth: 10, + ...baseArgs, now, }) expect(view).toEqual({ status: 'queued', instanceId: 'inst-1', - position: 51, - queueDepth: 100, - estimatedWaitMs: SESSION_LEN, + position: 3, + queueDepth: 10, + estimatedWaitMs: 2 * TICK_MS, queuedAt: now.toISOString(), }) }) @@ -83,8 +90,7 @@ describe('toSessionStateResponse', () => { row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }), position: 0, queueDepth: 0, - maxConcurrent: MAX_CONC, - sessionLengthMs: SESSION_LEN, + ...baseArgs, now, }) expect(view).toEqual({ @@ -101,8 +107,7 @@ describe('toSessionStateResponse', () => { row: row({ status: 'active', admitted_at: now, expires_at: new Date(now.getTime() - 1) }), position: 0, queueDepth: 0, - maxConcurrent: MAX_CONC, - sessionLengthMs: SESSION_LEN, + ...baseArgs, now, }) expect(view).toBeNull() diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 0bc9a2dfd3..44de539ba2 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,7 +1,6 @@ import { ADMISSION_TICK_MS, MAX_ADMITS_PER_TICK, - getMaxConcurrentSessions, getSessionLengthMs, isWaitingRoomEnabled, } from './config' @@ -20,8 +19,8 @@ let state: AdmissionState | null = null /** Emit a `[FreeSessionAdmission] snapshot` log every N ticks even when * nothing changed, so dashboards / alerts have a reliable heartbeat of - * queue depth and active count. At ADMISSION_TICK_MS=5s, 12 ticks = 1 min. */ -const SNAPSHOT_EVERY_N_TICKS = 12 + * queue depth and active count. At ADMISSION_TICK_MS=15s, 10 ticks = 2.5 min. */ +const SNAPSHOT_EVERY_N_TICKS = 10 export interface AdmissionDeps { sweepExpired: (now: Date) => Promise @@ -33,7 +32,7 @@ export interface AdmissionDeps { now: Date }) => Promise<{ user_id: string }[]> isFireworksAdmissible: () => boolean - getMaxConcurrentSessions: () => number + getMaxAdmitsPerTick: () => number getSessionLengthMs: () => number now?: () => Date } @@ -44,7 +43,7 @@ const defaultDeps: AdmissionDeps = { queueDepth, admitFromQueue, isFireworksAdmissible, - getMaxConcurrentSessions, + getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK, getSessionLengthMs, } @@ -53,14 +52,19 @@ export interface AdmissionTickResult { admitted: number active: number queueDepth: number - skipped: 'health' | 'full' | null + skipped: 'health' | null } /** * Run a single admission tick: * 1. Expire sessions past their expires_at. * 2. If Fireworks is not 'healthy', skip admission (waiting queue grows). - * 3. Admit up to (maxConcurrent - activeCount, MAX_ADMITS_PER_TICK) users. + * 3. Admit up to maxAdmitsPerTick queued users. + * + * There is no global concurrency cap — the Fireworks health monitor is the + * primary gate. Admission drips at (maxAdmitsPerTick / ADMISSION_TICK_MS), + * which drives utilization up slowly; once metrics degrade, step 2 halts + * admission until things recover. * * Returns counts for observability. Safe to call concurrently across pods — * the underlying admit query takes an advisory xact lock. @@ -80,15 +84,8 @@ export async function runAdmissionTick( } const active = await deps.countActive(now) - const max = deps.getMaxConcurrentSessions() - const capacity = Math.min(Math.max(0, max - active), MAX_ADMITS_PER_TICK) - if (capacity === 0) { - const depth = await deps.queueDepth() - return { expired, admitted: 0, active, queueDepth: depth, skipped: 'full' } - } - const admitted = await deps.admitFromQueue({ - limit: capacity, + limit: deps.getMaxAdmitsPerTick(), sessionLengthMs: deps.getSessionLengthMs(), now, }) @@ -129,7 +126,6 @@ function runTick() { expired: result.expired, active: result.active, queueDepth: result.queueDepth, - maxConcurrent: getMaxConcurrentSessions(), skipped: result.skipped, }, changed ? '[FreeSessionAdmission] tick' : '[FreeSessionAdmission] snapshot', @@ -158,7 +154,7 @@ export function startFreeSessionAdmission(): boolean { state = { timer: null, inFlight: null, tickCount: 0 } runTick() logger.info( - { tickMs: ADMISSION_TICK_MS, maxConcurrent: getMaxConcurrentSessions() }, + { tickMs: ADMISSION_TICK_MS, maxAdmitsPerTick: MAX_ADMITS_PER_TICK }, '[FreeSessionAdmission] Started', ) return true diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 5bf26b86ce..e41f2a63cb 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -24,7 +24,3 @@ export function isWaitingRoomEnabled(): boolean { export function getSessionLengthMs(): number { return env.FREEBUFF_SESSION_LENGTH_MS } - -export function getMaxConcurrentSessions(): number { - return env.FREEBUFF_MAX_CONCURRENT_SESSIONS -} diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index b0e19b7ca9..7e345dd264 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,6 +1,6 @@ import { - getMaxConcurrentSessions, - getSessionLengthMs, + ADMISSION_TICK_MS, + MAX_ADMITS_PER_TICK, isWaitingRoomEnabled, } from './config' import { @@ -21,8 +21,8 @@ export interface SessionDeps { queueDepth: () => Promise queuePositionFor: (params: { userId: string; queuedAt: Date }) => Promise isWaitingRoomEnabled: () => boolean - getMaxConcurrentSessions: () => number - getSessionLengthMs: () => number + getAdmissionTickMs: () => number + getMaxAdmitsPerTick: () => number now?: () => Date } @@ -33,8 +33,8 @@ const defaultDeps: SessionDeps = { queueDepth, queuePositionFor, isWaitingRoomEnabled, - getMaxConcurrentSessions, - getSessionLengthMs, + getAdmissionTickMs: () => ADMISSION_TICK_MS, + getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK, } const nowOf = (deps: SessionDeps): Date => (deps.now ?? (() => new Date()))() @@ -55,8 +55,8 @@ async function viewForRow( row, position, queueDepth: depth, - maxConcurrent: deps.getMaxConcurrentSessions(), - sessionLengthMs: deps.getSessionLengthMs(), + admissionTickMs: deps.getAdmissionTickMs(), + maxAdmitsPerTick: deps.getMaxAdmitsPerTick(), now: nowOf(deps), }) } diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index 6774b6d636..61ad6f0c84 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -9,11 +9,11 @@ export function toSessionStateResponse(params: { row: InternalSessionRow | null position: number queueDepth: number - maxConcurrent: number - sessionLengthMs: number + admissionTickMs: number + maxAdmitsPerTick: number now: Date }): SessionStateResponse | null { - const { row, position, queueDepth, maxConcurrent, sessionLengthMs, now } = params + const { row, position, queueDepth, admissionTickMs, maxAdmitsPerTick, now } = params if (!row) return null if (row.status === 'active' && row.expires_at && row.expires_at.getTime() > now.getTime()) { @@ -34,8 +34,8 @@ export function toSessionStateResponse(params: { queueDepth, estimatedWaitMs: estimateWaitMs({ position, - maxConcurrent, - sessionLengthMs, + admissionTickMs, + maxAdmitsPerTick, }), queuedAt: row.queued_at.toISOString(), } @@ -46,21 +46,21 @@ export function toSessionStateResponse(params: { } /** - * Upper-bound estimate: assumes full capacity and uniform session expiry. - * Real wait time is usually lower because sessions finish early. + * Wait-time estimate under the drip-admission model: we admit + * `maxAdmitsPerTick` users every `admissionTickMs`, gated by Fireworks + * health. Ignoring health pauses, user at position P waits roughly + * `ceil((P - 1) / maxAdmitsPerTick) * admissionTickMs`. * - * waitMs ≈ floor((position - 1) / maxConcurrent) * sessionLengthMs - * - * Position 1..maxConcurrent → 0ms (next admission tick will pick you up). - * Position maxConcurrent+1..2*maxConcurrent → one full session length. + * Position 1 → 0ms (next tick picks you up). + * Position maxAdmitsPerTick+1 → one tick. */ export function estimateWaitMs(params: { position: number - maxConcurrent: number - sessionLengthMs: number + admissionTickMs: number + maxAdmitsPerTick: number }): number { - const { position, maxConcurrent, sessionLengthMs } = params - if (position <= 0 || maxConcurrent <= 0) return 0 - const waves = Math.floor((position - 1) / maxConcurrent) - return waves * sessionLengthMs + const { position, admissionTickMs, maxAdmitsPerTick } = params + if (position <= 1 || admissionTickMs <= 0 || maxAdmitsPerTick <= 0) return 0 + const ticksAhead = Math.ceil((position - 1) / maxAdmitsPerTick) + return ticksAhead * admissionTickMs } From 0a1bd36a814c81955fb0854b53b30183d44c8145 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:28:51 -0700 Subject: [PATCH 17/31] Handle Ctrl+C on freebuff waiting-room / superseded screens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stdin is in raw mode on these screens, so SIGINT never fires — Ctrl+C had no effect and users had to kill the process. Now both screens hook Ctrl+C via OpenTUI's useKeyboard, flush analytics with a 1s cap, and exit. The waiting-room screen additionally sends a best-effort DELETE /api/v1/freebuff/session before exit so the seat frees up immediately instead of waiting on the server-side expiry sweep. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/freebuff-superseded-screen.tsx | 26 +++++- cli/src/components/waiting-room-screen.tsx | 83 ++++++++++++------- cli/src/hooks/use-freebuff-session.ts | 21 +++++ 3 files changed, 101 insertions(+), 29 deletions(-) diff --git a/cli/src/components/freebuff-superseded-screen.tsx b/cli/src/components/freebuff-superseded-screen.tsx index bd730b3c66..8d027c8978 100644 --- a/cli/src/components/freebuff-superseded-screen.tsx +++ b/cli/src/components/freebuff-superseded-screen.tsx @@ -1,11 +1,19 @@ import { TextAttributes } from '@opentui/core' -import React from 'react' +import { useKeyboard } from '@opentui/react' +import React, { useCallback } from 'react' import { useLogo } from '../hooks/use-logo' import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' import { useTheme } from '../hooks/use-theme' +import { flushAnalytics } from '../utils/analytics' +import { withTimeout } from '../utils/terminal-color-detection' import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' +import type { KeyEvent } from '@opentui/core' + +/** Cap on analytics flush so a slow network doesn't block process exit. */ +const EXIT_CLEANUP_TIMEOUT_MS = 1000 + /** * Terminal state shown after a 409 session_superseded response. Another CLI on * the same account rotated our instance id and we've stopped polling — the @@ -22,6 +30,22 @@ export const FreebuffSupersededScreen: React.FC = () => { blockColor, }) + // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes + // through as a normal OpenTUI key event. No DELETE needed here: the other + // CLI already rotated our instance id, so our seat (if any) belongs to them. + useKeyboard( + useCallback((key: KeyEvent) => { + if (key.ctrl && key.name === 'c') { + key.preventDefault?.() + withTimeout(flushAnalytics(), EXIT_CLEANUP_TIMEOUT_MS, undefined).finally( + () => { + process.exit(0) + }, + ) + } + }, []), + ) + return ( = ({ // Always enable ads in the waiting room — this is where monetization lives. const { ad, adData, recordImpression } = useGravityAd({ enabled: true }) + // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes + // through as a normal OpenTUI key event. Release the seat before exit so + // the next user in line doesn't have to wait for server-side expiry. + useKeyboard( + useCallback((key: KeyEvent) => { + if (key.ctrl && key.name === 'c') { + key.preventDefault?.() + const cleanup = Promise.allSettled([ + flushAnalytics(), + endFreebuffSessionBestEffort(), + ]) + withTimeout(cleanup, EXIT_CLEANUP_TIMEOUT_MS, undefined).finally(() => { + process.exit(0) + }) + } + }, []), + ) + // Elapsed-in-queue timer. Starts from `queuedAt` so it keeps ticking even if // the user wanders away and comes back. const queuedAtMs = useMemo(() => { @@ -127,40 +153,41 @@ export const WaitingRoomScreen: React.FC = ({ {isQueued && session && ( <> - - + + You're in the waiting room - - Position{' '} - - {session.position} - - of {session.queueDepth} - - - Estimated wait:{' '} - - {formatWait(session.estimatedWaitMs)} - - - - Waiting for {formatElapsed(elapsedMs)} - - - - - - Leave this window open — we'll ding when your session starts. + {session.position === 1 ? ( + + + + ) : ( + + Position + + {session.position} + + / {session.queueDepth} + + )} + {session.position !== 1 && ( + + Wait + + + + + )} + + Elapsed + {formatElapsed(elapsedMs)} diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 234ef994b9..103e2494e7 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -114,6 +114,27 @@ export function markFreebuffSessionSuperseded(): void { activeRefreshHandle?.markSuperseded() } +/** + * Best-effort DELETE of the caller's session row. Used by exit paths that + * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly + * instead of waiting for the server-side expiry sweep. Swallows errors + * because we are about to terminate anyway. + */ +export async function endFreebuffSessionBestEffort(): Promise { + if (!IS_FREEBUFF) return + const current = useFreebuffSessionStore.getState().session + if (!current || (current.status !== 'queued' && current.status !== 'active')) { + return + } + const { token } = getAuthTokenDetails() + if (!token) return + try { + await callSession('DELETE', token) + } catch { + // swallow — we're exiting + } +} + /** * Manages the freebuff waiting-room session lifecycle: * - POST on mount to join the queue / rotate instance id From e25cde5709ef0c760c831dc587df08b41c42e162 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:38:39 -0700 Subject: [PATCH 18/31] Tighten TTFT/queue degraded thresholds; add scrape-check script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit p50 TTFT degraded 1000 → 1500ms and p50 generation queue degraded 200 → 300ms, so a healthy deployment running at steady-state 1s TTFT does not trip the admission gate. scripts/scrape-check.ts pulls the live Fireworks metrics and prints the same per-deployment health the admission gate sees — useful for tuning thresholds without guessing. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/scripts/scrape-check.ts | 54 +++++++++++++++++++ .../fireworks-monitor/compute-health.ts | 12 ++--- 2 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 web/scripts/scrape-check.ts diff --git a/web/scripts/scrape-check.ts b/web/scripts/scrape-check.ts new file mode 100644 index 0000000000..d4b863135b --- /dev/null +++ b/web/scripts/scrape-check.ts @@ -0,0 +1,54 @@ +/** + * One-off: scrape Fireworks metrics for each configured deployment and print + * the same health summary the admission gate would see. + * + * Usage: + * bun run web/scripts/scrape-check.ts + */ + +import { env } from '@codebuff/internal/env' + +import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '@/server/fireworks-monitor/compute-health' +import { scrapeFireworksMetrics } from '@/server/fireworks-monitor/monitor' +import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config' + +async function main() { + const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP) + const metrics = await scrapeFireworksMetrics({ + apiKey: env.FIREWORKS_API_KEY, + accountId: FIREWORKS_ACCOUNT_ID, + }) + const snapshot = computeSnapshot({ + metrics, + deployments, + thresholds: DEFAULT_HEALTH_THRESHOLDS, + }) + + console.log(`scrapedAt: ${new Date(snapshot.scrapedAt ?? 0).toISOString()}`) + console.log(`overall: ${snapshot.overall}\n`) + + for (const [deployment, health] of Object.entries(snapshot.deployments)) { + console.log(`── ${deployment} (${health.baseModel ?? 'unknown'})`) + console.log(` status: ${health.status}`) + console.log(` replicas: ${health.metrics.replicas}`) + console.log(` req/s: ${health.metrics.requestRate.toFixed(2)}`) + console.log(` errors: ${(health.metrics.errorFraction * 100).toFixed(2)}%`) + console.log(` kvBlocks: ${(health.metrics.kvBlocksFraction * 100).toFixed(1)}%`) + console.log(` kvSlots: ${(health.metrics.kvSlotsFraction * 100).toFixed(1)}%`) + console.log(` concurrent: ${health.metrics.concurrentRequests.toFixed(1)}`) + const q = health.metrics.p50GenerationQueueMs + const t = health.metrics.p50TimeToFirstTokenMs + console.log(` p50 queue: ${q === null ? 'n/a' : `${Math.round(q)}ms`}`) + console.log(` p50 TTFT: ${t === null ? 'n/a' : `${Math.round(t)}ms`}`) + if (health.reasons.length > 0) { + console.log(` reasons:`) + for (const r of health.reasons) console.log(` - ${r}`) + } + console.log() + } +} + +void main().catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/web/src/server/fireworks-monitor/compute-health.ts b/web/src/server/fireworks-monitor/compute-health.ts index 9cc6e94714..1d737bc0d9 100644 --- a/web/src/server/fireworks-monitor/compute-health.ts +++ b/web/src/server/fireworks-monitor/compute-health.ts @@ -47,12 +47,12 @@ export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = { minRequestRateForErrorCheck: 0.1, errorFractionDegraded: 0.02, errorFractionUnhealthy: 0.1, - kvBlocksFractionDegraded: 0.95, - kvBlocksFractionUnhealthy: 0.99, - generationQueueMsDegraded: 400, - generationQueueMsUnhealthy: 15_000, - ttftMsDegraded: 2_000, - ttftMsUnhealthy: 30_000, + kvBlocksFractionDegraded: 0.85, + kvBlocksFractionUnhealthy: 0.97, + generationQueueMsDegraded: 300, + generationQueueMsUnhealthy: 2_000, + ttftMsDegraded: 1_500, + ttftMsUnhealthy: 10_000, } const STATUS_RANK: Record = { From 8ee55ab022f103bf77a4eb3340ac9d923083a872 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 16:57:46 -0700 Subject: [PATCH 19/31] Improve waiting room screen --- cli/src/components/waiting-room-screen.tsx | 92 +++++++++++++++++----- cli/src/hooks/use-gravity-ad.ts | 21 +++-- 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index f3aa01adb8..73825d0ba0 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -3,6 +3,7 @@ import { useKeyboard, useRenderer } from '@opentui/react' import React, { useCallback, useEffect, useMemo, useState } from 'react' import { AdBanner } from './ad-banner' +import { Button } from './button' import { ChoiceAdBanner } from './choice-ad-banner' import { ShimmerText } from './shimmer-text' import { endFreebuffSessionBestEffort } from '../hooks/use-freebuff-session' @@ -74,26 +75,41 @@ export const WaitingRoomScreen: React.FC = ({ }) // Always enable ads in the waiting room — this is where monetization lives. - const { ad, adData, recordImpression } = useGravityAd({ enabled: true }) + // forceStart bypasses the "wait for first user message" gate inside the hook, + // which would otherwise block ads here since no conversation exists yet. + const { ad, adData, recordImpression } = useGravityAd({ + enabled: true, + forceStart: true, + }) + + // Release the seat + flush analytics before exit. Used by both Ctrl+C and + // the top-right X button so they always do the same cleanup. + const handleExit = useCallback(() => { + const cleanup = Promise.allSettled([ + flushAnalytics(), + endFreebuffSessionBestEffort(), + ]) + withTimeout(cleanup, EXIT_CLEANUP_TIMEOUT_MS, undefined).finally(() => { + process.exit(0) + }) + }, []) // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes - // through as a normal OpenTUI key event. Release the seat before exit so - // the next user in line doesn't have to wait for server-side expiry. + // through as a normal OpenTUI key event. useKeyboard( - useCallback((key: KeyEvent) => { - if (key.ctrl && key.name === 'c') { - key.preventDefault?.() - const cleanup = Promise.allSettled([ - flushAnalytics(), - endFreebuffSessionBestEffort(), - ]) - withTimeout(cleanup, EXIT_CLEANUP_TIMEOUT_MS, undefined).finally(() => { - process.exit(0) - }) - } - }, []), + useCallback( + (key: KeyEvent) => { + if (key.ctrl && key.name === 'c') { + key.preventDefault?.() + handleExit() + } + }, + [handleExit], + ), ) + const [exitHover, setExitHover] = useState(false) + // Elapsed-in-queue timer. Starts from `queuedAt` so it keeps ticking even if // the user wanders away and comes back. const queuedAtMs = useMemo(() => { @@ -118,14 +134,45 @@ export const WaitingRoomScreen: React.FC = ({ backgroundColor: theme.background, }} > + {/* Top-right exit affordance so mouse users have a clear way out even + when they don't know Ctrl+C works. width: '100%' is required for + justifyContent: 'flex-end' to actually push the X to the right. */} + + + + @@ -165,9 +212,16 @@ export const WaitingRoomScreen: React.FC = ({ }} > {session.position === 1 ? ( - - - + <> + + + + + {session.queueDepth === 1 + ? 'just you in line right now' + : `${session.queueDepth} people in line`} + + ) : ( Position diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts index 4ed964c47a..7093d9848b 100644 --- a/cli/src/hooks/use-gravity-ad.ts +++ b/cli/src/hooks/use-gravity-ad.ts @@ -96,8 +96,14 @@ function nextFromChoiceCache(ctrl: GravityController): AdResponse[] | null { * * Activity is tracked via the global activity-tracker module. */ -export const useGravityAd = (options?: { enabled?: boolean }): GravityAdState => { +export const useGravityAd = (options?: { + enabled?: boolean + /** Skip the "wait for first user message" gate. Used by the freebuff + * waiting room, which has no conversation but still needs ads. */ + forceStart?: boolean +}): GravityAdState => { const enabled = options?.enabled ?? true + const forceStart = options?.forceStart ?? false const [ad, setAd] = useState(null) const [adData, setAdData] = useState(null) const [isLoading, setIsLoading] = useState(false) @@ -115,9 +121,12 @@ export const useGravityAd = (options?: { enabled?: boolean }): GravityAdState => const shouldHideAds = !enabled || (isVeryCompactHeight && !isFreeMode) // Use Zustand selector instead of manual subscription - only rerenders when value changes - const hasUserMessaged = useChatStore((s) => + const hasUserMessagedStore = useChatStore((s) => s.messages.some((m) => m.variant === 'user'), ) + // forceStart lets callers (e.g. the waiting room) opt out of the + // "wait for the first user message" gate. + const shouldStart = forceStart || hasUserMessagedStore // Single consolidated controller ref const ctrlRef = useRef({ @@ -358,9 +367,9 @@ export const useGravityAd = (options?: { enabled?: boolean }): GravityAdState => }) }, []) - // Start rotation when user sends first message + // Start rotation when user sends first message (or immediately if forced). useEffect(() => { - if (!hasUserMessaged || !getAdsEnabled() || shouldHideAds) return + if (!shouldStart || !getAdsEnabled() || shouldHideAds) return setIsLoading(true) @@ -390,10 +399,10 @@ export const useGravityAd = (options?: { enabled?: boolean }): GravityAdState => clearInterval(id) ctrlRef.current.intervalId = null } - }, [hasUserMessaged, shouldHideAds]) + }, [shouldStart, shouldHideAds]) // Don't return ad when ads should be hidden - const visible = hasUserMessaged && !shouldHideAds + const visible = shouldStart && !shouldHideAds return { ad: visible ? ad : null, adData: visible ? adData : null, From 845bed1e2c3d5948f880c38465920beafdbb9944 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 17:04:58 -0700 Subject: [PATCH 20/31] Session countdown --- cli/src/chat.tsx | 7 ++- .../components/freebuff-session-countdown.tsx | 60 +++++++++++++++++++ cli/src/components/status-bar.tsx | 4 ++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 cli/src/components/freebuff-session-countdown.tsx diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 1f65a51e4e..cb8b41f550 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -21,6 +21,7 @@ import { ReviewScreen } from './components/review-screen' import { MessageWithAgents } from './components/message-with-agents' import { areCreditsRestored } from './components/out-of-credits-banner' import { PendingBashMessage } from './components/pending-bash-message' +import { useHasActiveFreebuffSession } from './components/freebuff-session-countdown' import { StatusBar } from './components/status-bar' import { TopBanner } from './components/top-banner' import { getSlashCommandsWithSkills } from './data/slash-commands' @@ -1337,9 +1338,13 @@ export const Chat = ({ return ` ${segments.join(' ')} ` }, [queuePreviewTitle, pausedQueueText]) + const hasActiveFreebuffSession = useHasActiveFreebuffSession() const shouldShowStatusLine = !feedbackMode && - (hasStatusIndicatorContent || shouldShowQueuePreview || !isAtBottom) + (hasStatusIndicatorContent || + shouldShowQueuePreview || + !isAtBottom || + hasActiveFreebuffSession) // Track mouse movement for ad activity (throttled) const lastMouseActivityRef = useRef(0) diff --git a/cli/src/components/freebuff-session-countdown.tsx b/cli/src/components/freebuff-session-countdown.tsx new file mode 100644 index 0000000000..7b10193ffd --- /dev/null +++ b/cli/src/components/freebuff-session-countdown.tsx @@ -0,0 +1,60 @@ +import React, { useEffect, useState } from 'react' + +import { useTheme } from '../hooks/use-theme' +import { useFreebuffSessionStore } from '../state/freebuff-session-store' +import { IS_FREEBUFF } from '../utils/constants' + +const LOW_THRESHOLD_MS = 5 * 60_000 +const CRITICAL_THRESHOLD_MS = 60_000 + +const formatRemaining = (ms: number): string => { + if (ms <= 0) return 'expiring…' + const totalSeconds = Math.ceil(ms / 1000) + if (totalSeconds < 60) return `${totalSeconds}s left` + const minutes = Math.floor(totalSeconds / 60) + if (minutes < 60) return `${minutes}m left` + const hours = Math.floor(minutes / 60) + const rem = minutes % 60 + return rem === 0 ? `${hours}h left` : `${hours}h ${rem}m left` +} + +/** + * Small countdown shown while a freebuff session is active. Renders the + * time remaining until the server-issued `expiresAt` so users aren't + * surprised when their seat is released. Returns null in non-freebuff + * builds or when no active session exists — safe to always mount. + */ +export const FreebuffSessionCountdown: React.FC = () => { + const theme = useTheme() + const session = useFreebuffSessionStore((s) => s.session) + const expiresAtMs = + session?.status === 'active' ? Date.parse(session.expiresAt) : null + + const [now, setNow] = useState(() => Date.now()) + useEffect(() => { + if (!expiresAtMs) return + const id = setInterval(() => setNow(Date.now()), 1000) + return () => clearInterval(id) + }, [expiresAtMs]) + + if (!IS_FREEBUFF || !expiresAtMs) return null + + const remainingMs = expiresAtMs - now + const color = + remainingMs < CRITICAL_THRESHOLD_MS + ? theme.error + : remainingMs < LOW_THRESHOLD_MS + ? theme.warning + : theme.muted + + return {formatRemaining(remainingMs)} +} + +/** True when the freebuff session countdown will render non-null content. + * Used by the chat surface to keep the status bar visible while a + * session is active, even when there's no streaming/queue activity. */ +export const useHasActiveFreebuffSession = (): boolean => { + return useFreebuffSessionStore( + (s) => IS_FREEBUFF && s.session?.status === 'active', + ) +} diff --git a/cli/src/components/status-bar.tsx b/cli/src/components/status-bar.tsx index 1336ffd41d..e6a2a64e44 100644 --- a/cli/src/components/status-bar.tsx +++ b/cli/src/components/status-bar.tsx @@ -1,5 +1,6 @@ import React, { useEffect, useState } from 'react' +import { FreebuffSessionCountdown } from './freebuff-session-countdown' import { ScrollToBottomButton } from './scroll-to-bottom-button' import { ShimmerText } from './shimmer-text' import { StopButton } from './stop-button' @@ -169,6 +170,9 @@ export const StatusBar = ({ }} > {elapsedTimeContent} + + + {onStop && (statusIndicatorState.kind === 'waiting' || statusIndicatorState.kind === 'streaming') && ( )} From 5ddb1020abec110508178c8a476e9924ff2c9d6f Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 17:19:57 -0700 Subject: [PATCH 21/31] Add freebuff session grace window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep admitting requests for FREEBUFF_SESSION_GRACE_MS (default 30m) after a session's expires_at so in-flight agent runs can drain; hard cutoff past that. Also: replicas=0 → unhealthy, hoist chat/completions gate status map, fix stale threshold comment and a pre-existing free-mode test missing the checkSessionAdmissible override. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/internal/src/env-schema.ts | 2 + .../completions/__tests__/completions.test.ts | 1 + web/src/app/api/v1/chat/completions/_post.ts | 19 +++++---- .../session/__tests__/session.test.ts | 1 + .../fireworks-monitor/compute-health.ts | 6 +-- .../free-session/__tests__/admission.test.ts | 14 +++++++ .../free-session/__tests__/public-api.test.ts | 40 ++++++++++++++++++- .../__tests__/session-view.test.ts | 30 +++++++++++++- web/src/server/free-session/admission.ts | 7 +++- web/src/server/free-session/config.ts | 8 ++++ web/src/server/free-session/public-api.ts | 32 +++++++++++++-- web/src/server/free-session/session-view.ts | 34 +++++++++++----- web/src/server/free-session/store.ts | 11 +++-- web/src/server/free-session/types.ts | 12 ++++++ 14 files changed, 185 insertions(+), 32 deletions(-) diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index 828a64c93f..2f2532b92a 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -42,6 +42,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ .default('false') .transform((v) => v === 'true'), FREEBUFF_SESSION_LENGTH_MS: z.coerce.number().int().positive().default(60 * 60 * 1000), + FREEBUFF_SESSION_GRACE_MS: z.coerce.number().int().nonnegative().default(30 * 60 * 1000), }) export const serverEnvVars = serverEnvSchema.keyof().options export type ServerEnvVar = (typeof serverEnvVars)[number] @@ -93,4 +94,5 @@ export const serverProcessEnv: ServerInput = { // Freebuff waiting room FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED, FREEBUFF_SESSION_LENGTH_MS: process.env.FREEBUFF_SESSION_LENGTH_MS, + FREEBUFF_SESSION_GRACE_MS: process.env.FREEBUFF_SESSION_GRACE_MS, } diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 0577cdcc99..5dac252ca7 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -583,6 +583,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) expect(response.status).toBe(200) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 21b0373f02..06258039e7 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -68,6 +68,8 @@ import { OpenRouterError, } from '@/llm-api/openrouter' import { checkSessionAdmissible } from '@/server/free-session/public-api' + +import type { SessionGateResult } from '@/server/free-session/public-api' import { extractApiKeyFromHeader } from '@/util/auth' import { withDefaultProperties } from '@codebuff/common/analytics' import { checkFreeModeRateLimit } from './free-mode-rate-limiter' @@ -138,6 +140,15 @@ export const formatQuotaResetCountdown = ( export type CheckSessionAdmissibleFn = typeof checkSessionAdmissible +type GateRejectCode = Extract['code'] + +const STATUS_BY_GATE_CODE = { + waiting_room_required: 428, + waiting_room_queued: 429, + session_superseded: 409, + session_expired: 410, +} satisfies Record + export async function postChatCompletions(params: { req: NextRequest getUserInfoFromApiKey: GetUserInfoFromApiKeyFn @@ -410,15 +421,9 @@ export async function postChatCompletions(params: { properties: { error: gate.code }, logger, }) - const statusByCode: Record = { - waiting_room_required: 428, - waiting_room_queued: 429, - session_superseded: 409, - session_expired: 410, - } return NextResponse.json( { error: gate.code, message: gate.message }, - { status: statusByCode[gate.code] ?? 429 }, + { status: STATUS_BY_GATE_CODE[gate.code] }, ) } } diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index cbdf4d6cfa..c41573eec0 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -30,6 +30,7 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { isWaitingRoomEnabled: () => true, getAdmissionTickMs: () => 15_000, getMaxAdmitsPerTick: () => 1, + getSessionGraceMs: () => 30 * 60 * 1000, now: () => now, getSessionRow: async (userId) => rows.get(userId) ?? null, queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length, diff --git a/web/src/server/fireworks-monitor/compute-health.ts b/web/src/server/fireworks-monitor/compute-health.ts index 1d737bc0d9..aa9ae53ba2 100644 --- a/web/src/server/fireworks-monitor/compute-health.ts +++ b/web/src/server/fireworks-monitor/compute-health.ts @@ -38,10 +38,8 @@ export interface HealthThresholds { ttftMsUnhealthy: number } -// Default thresholds are calibrated to the observed freebuff workload on -// glm-5.1 / kimi-k2.5. They are intentionally loose at first so a cold -// deployment does not flap; expect to tighten once you have a week of -// live data. Override per-instance via startFireworksMonitor({ thresholds }). +// Tuned to trip 'degraded' before users feel it on glm-5.1. Override per-instance +// via startFireworksMonitor({ thresholds }). export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = { staleSnapshotMs: 3 * 60 * 1000, minRequestRateForErrorCheck: 0.1, diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index e9620b3994..2e72d2351e 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -22,6 +22,7 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep isFireworksAdmissible: () => true, getMaxAdmitsPerTick: () => 1, getSessionLengthMs: () => 60 * 60 * 1000, + getSessionGraceMs: () => 30 * 60 * 1000, now: () => NOW, ...overrides, } @@ -73,4 +74,17 @@ describe('runAdmissionTick', () => { expect(result.expired).toBe(2) expect(result.admitted).toBe(1) }) + + test('forwards grace ms to sweepExpired', async () => { + const received: number[] = [] + const deps = makeAdmissionDeps({ + getSessionGraceMs: () => 12_345, + sweepExpired: async (_now, graceMs) => { + received.push(graceMs) + return 0 + }, + }) + await runAdmissionTick(deps) + expect(received).toEqual([12_345]) + }) }) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index 3193e972c2..fa66e5d68d 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -13,6 +13,7 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const TICK_MS = 15_000 const ADMITS_PER_TICK = 1 +const GRACE_MS = 30 * 60 * 1000 function makeDeps(overrides: Partial = {}): SessionDeps & { rows: Map @@ -38,6 +39,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { isWaitingRoomEnabled: () => true, getAdmissionTickMs: () => TICK_MS, getMaxAdmitsPerTick: () => ADMITS_PER_TICK, + getSessionGraceMs: () => GRACE_MS, now: () => currentNow, getSessionRow: async (userId) => rows.get(userId) ?? null, endSession: async (userId) => { @@ -250,12 +252,30 @@ describe('checkSessionAdmissible', () => { expect(result.code).toBe('session_superseded') }) - test('active but expires_at in the past → session_expired', async () => { + test('active inside grace window → ok with reason=draining', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) + // 1 minute past expiry, well within the 30-minute grace window + row.expires_at = new Date(deps._now().getTime() - 60_000) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + deps, + }) + expect(result.ok).toBe(true) + if (!result.ok || result.reason !== 'draining') throw new Error('unreachable') + expect(result.gracePeriodRemainingMs).toBe(GRACE_MS - 60_000) + }) + + test('active past the grace window → session_expired', async () => { await requestSession({ userId: 'u1', deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) - row.expires_at = new Date(deps._now().getTime() - 1) + row.expires_at = new Date(deps._now().getTime() - GRACE_MS - 1) const result = await checkSessionAdmissible({ userId: 'u1', @@ -265,6 +285,22 @@ describe('checkSessionAdmissible', () => { if (result.ok) throw new Error('unreachable') expect(result.code).toBe('session_expired') }) + + test('draining + wrong instance id still rejects with session_superseded', async () => { + await requestSession({ userId: 'u1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) + row.expires_at = new Date(deps._now().getTime() - 60_000) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'stale-token', + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_superseded') + }) }) describe('endUserSession', () => { diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index f519b0681c..22686cdb03 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -6,6 +6,7 @@ import type { InternalSessionRow } from '../types' const TICK_MS = 15_000 const ADMITS_PER_TICK = 1 +const GRACE_MS = 30 * 60_000 function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') @@ -52,6 +53,7 @@ describe('toSessionStateResponse', () => { const baseArgs = { admissionTickMs: TICK_MS, maxAdmitsPerTick: ADMITS_PER_TICK, + graceMs: GRACE_MS, } test('returns null when row is null', () => { @@ -102,9 +104,33 @@ describe('toSessionStateResponse', () => { }) }) - test('active but expired row maps to null (caller should re-queue)', () => { + test('active row inside grace window maps to draining response', () => { + const admittedAt = new Date(now.getTime() - 65 * 60_000) + const expiresAt = new Date(now.getTime() - 5 * 60_000) // 5 min past expiry const view = toSessionStateResponse({ - row: row({ status: 'active', admitted_at: now, expires_at: new Date(now.getTime() - 1) }), + row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }), + position: 0, + queueDepth: 0, + ...baseArgs, + now, + }) + expect(view).toEqual({ + status: 'draining', + instanceId: 'inst-1', + admittedAt: admittedAt.toISOString(), + expiresAt: expiresAt.toISOString(), + gracePeriodEndsAt: new Date(expiresAt.getTime() + GRACE_MS).toISOString(), + gracePeriodRemainingMs: GRACE_MS - 5 * 60_000, + }) + }) + + test('active row past the grace window maps to null (caller should re-queue)', () => { + const view = toSessionStateResponse({ + row: row({ + status: 'active', + admitted_at: now, + expires_at: new Date(now.getTime() - GRACE_MS - 1), + }), position: 0, queueDepth: 0, ...baseArgs, diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 44de539ba2..6868903c38 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,6 +1,7 @@ import { ADMISSION_TICK_MS, MAX_ADMITS_PER_TICK, + getSessionGraceMs, getSessionLengthMs, isWaitingRoomEnabled, } from './config' @@ -23,7 +24,7 @@ let state: AdmissionState | null = null const SNAPSHOT_EVERY_N_TICKS = 10 export interface AdmissionDeps { - sweepExpired: (now: Date) => Promise + sweepExpired: (now: Date, graceMs: number) => Promise countActive: (now: Date) => Promise queueDepth: () => Promise admitFromQueue: (params: { @@ -34,6 +35,7 @@ export interface AdmissionDeps { isFireworksAdmissible: () => boolean getMaxAdmitsPerTick: () => number getSessionLengthMs: () => number + getSessionGraceMs: () => number now?: () => Date } @@ -45,6 +47,7 @@ const defaultDeps: AdmissionDeps = { isFireworksAdmissible, getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK, getSessionLengthMs, + getSessionGraceMs, } export interface AdmissionTickResult { @@ -73,7 +76,7 @@ export async function runAdmissionTick( deps: AdmissionDeps = defaultDeps, ): Promise { const now = (deps.now ?? (() => new Date()))() - const expired = await deps.sweepExpired(now) + const expired = await deps.sweepExpired(now, deps.getSessionGraceMs()) if (!deps.isFireworksAdmissible()) { const [active, depth] = await Promise.all([ diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index e41f2a63cb..23302f0bd0 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -24,3 +24,11 @@ export function isWaitingRoomEnabled(): boolean { export function getSessionLengthMs(): number { return env.FREEBUFF_SESSION_LENGTH_MS } + +/** Drain window after a session's `expires_at`. During this window the gate + * still admits requests so an in-flight agent run can finish, but the CLI is + * expected to stop accepting new user prompts. Hard cutoff at + * `expires_at + grace`; past that the gate returns `session_expired`. */ +export function getSessionGraceMs(): number { + return env.FREEBUFF_SESSION_GRACE_MS +} diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 7e345dd264..317e4c03da 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,6 +1,7 @@ import { ADMISSION_TICK_MS, MAX_ADMITS_PER_TICK, + getSessionGraceMs, isWaitingRoomEnabled, } from './config' import { @@ -23,6 +24,7 @@ export interface SessionDeps { isWaitingRoomEnabled: () => boolean getAdmissionTickMs: () => number getMaxAdmitsPerTick: () => number + getSessionGraceMs: () => number now?: () => Date } @@ -35,6 +37,7 @@ const defaultDeps: SessionDeps = { isWaitingRoomEnabled, getAdmissionTickMs: () => ADMISSION_TICK_MS, getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK, + getSessionGraceMs, } const nowOf = (deps: SessionDeps): Date => (deps.now ?? (() => new Date()))() @@ -57,6 +60,7 @@ async function viewForRow( queueDepth: depth, admissionTickMs: deps.getAdmissionTickMs(), maxAdmitsPerTick: deps.getMaxAdmitsPerTick(), + graceMs: deps.getSessionGraceMs(), now: nowOf(deps), }) } @@ -117,6 +121,12 @@ export async function endUserSession(params: { export type SessionGateResult = | { ok: true; reason: 'disabled' } | { ok: true; reason: 'active'; remainingMs: number } + | { + ok: true + reason: 'draining' + /** Time remaining until the hard cutoff (`expires_at + grace`). */ + gracePeriodRemainingMs: number + } | { ok: false; code: 'waiting_room_required'; message: string } | { ok: false; code: 'waiting_room_queued'; message: string } | { ok: false; code: 'session_superseded'; message: string } @@ -160,7 +170,13 @@ export async function checkSessionAdmissible(params: { } const now = nowOf(deps) - if (!row.expires_at || row.expires_at.getTime() <= now.getTime()) { + const nowMs = now.getTime() + const expiresAtMs = row.expires_at?.getTime() ?? 0 + const graceMs = deps.getSessionGraceMs() + // Past the hard cutoff (`expires_at + grace`). The grace window lets the CLI + // finish an in-flight agent run after the user's session ended; once it's + // gone, we fall back to the same re-queue flow as a regular expiry. + if (!row.expires_at || expiresAtMs + graceMs <= nowMs) { return { ok: false, code: 'session_expired', @@ -176,9 +192,19 @@ export async function checkSessionAdmissible(params: { } } + if (expiresAtMs > nowMs) { + return { + ok: true, + reason: 'active', + remainingMs: expiresAtMs - nowMs, + } + } + + // Inside the grace window: still admit so the agent can finish, but signal + // to the caller (and via metrics) that no new user prompts should arrive. return { ok: true, - reason: 'active', - remainingMs: row.expires_at.getTime() - now.getTime(), + reason: 'draining', + gracePeriodRemainingMs: expiresAtMs + graceMs - nowMs, } } diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index 61ad6f0c84..6abb99e785 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -11,18 +11,34 @@ export function toSessionStateResponse(params: { queueDepth: number admissionTickMs: number maxAdmitsPerTick: number + graceMs: number now: Date }): SessionStateResponse | null { - const { row, position, queueDepth, admissionTickMs, maxAdmitsPerTick, now } = params + const { row, position, queueDepth, admissionTickMs, maxAdmitsPerTick, graceMs, now } = params if (!row) return null - if (row.status === 'active' && row.expires_at && row.expires_at.getTime() > now.getTime()) { - return { - status: 'active', - instanceId: row.active_instance_id, - admittedAt: (row.admitted_at ?? row.created_at).toISOString(), - expiresAt: row.expires_at.toISOString(), - remainingMs: row.expires_at.getTime() - now.getTime(), + if (row.status === 'active' && row.expires_at) { + const expiresAtMs = row.expires_at.getTime() + const nowMs = now.getTime() + if (expiresAtMs > nowMs) { + return { + status: 'active', + instanceId: row.active_instance_id, + admittedAt: (row.admitted_at ?? row.created_at).toISOString(), + expiresAt: row.expires_at.toISOString(), + remainingMs: expiresAtMs - nowMs, + } + } + const graceEndsMs = expiresAtMs + graceMs + if (graceEndsMs > nowMs) { + return { + status: 'draining', + instanceId: row.active_instance_id, + admittedAt: (row.admitted_at ?? row.created_at).toISOString(), + expiresAt: row.expires_at.toISOString(), + gracePeriodEndsAt: new Date(graceEndsMs).toISOString(), + gracePeriodRemainingMs: graceEndsMs - nowMs, + } } } @@ -41,7 +57,7 @@ export function toSessionStateResponse(params: { } } - // expired active — callers should treat as "no session" and re-queue + // active row past the grace window — callers should treat as "no session" and re-queue return null } diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index 332d145c8e..a2622cd321 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -163,14 +163,19 @@ export async function queuePositionFor(params: { return Number(rows[0]?.n ?? 0) } -/** Remove rows whose active session has expired. Safe to call repeatedly. */ -export async function sweepExpired(now: Date): Promise { +/** + * Remove rows whose active session has expired past the drain grace window. + * Rows whose `expires_at` is in the past but still inside `expires_at + grace` + * are kept so an in-flight agent run can finish. Safe to call repeatedly. + */ +export async function sweepExpired(now: Date, graceMs: number): Promise { + const cutoff = new Date(now.getTime() - graceMs) const deleted = await db .delete(schema.freeSession) .where( and( eq(schema.freeSession.status, 'active'), - lt(schema.freeSession.expires_at, now), + lt(schema.freeSession.expires_at, cutoff), ), ) .returning({ user_id: schema.freeSession.user_id }) diff --git a/web/src/server/free-session/types.ts b/web/src/server/free-session/types.ts index 858bd63100..1564021bdd 100644 --- a/web/src/server/free-session/types.ts +++ b/web/src/server/free-session/types.ts @@ -23,6 +23,18 @@ export type SessionStateResponse = expiresAt: string remainingMs: number } + | { + /** Session is past `expiresAt` but still inside the grace window — the + * CLI must stop accepting new prompts but may finish any in-flight + * agent run. Hard cutoff at `gracePeriodEndsAt`; past that the gate + * rejects with `session_expired`. */ + status: 'draining' + instanceId: string + admittedAt: string + expiresAt: string + gracePeriodEndsAt: string + gracePeriodRemainingMs: number + } export interface InternalSessionRow { user_id: string From febb2630d9ee76c605d765d8bc4f5adc59072f6d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 17:47:51 -0700 Subject: [PATCH 22/31] Add freebuff session-end banner and drain-window handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the server flips the seat into `draining` (past `expires_at`) or past the hard cutoff, the chat input is replaced by a "Session ended" banner. While an agent is still streaming under the grace window, Enter is disabled and the banner shows "Agent is wrapping up. Rejoin the wait room after it's finished." — Esc still interrupts. Once idle, Enter re-POSTs /session to drop back into the waiting room. Adds a small countdown to the far right of the status bar (muted, turning soft warning in the final minute — no red) and schedules the next poll just after expires_at / gracePeriodEndsAt so the draining transition shows up promptly instead of stalling at 0 for a full interval. Moves getFreebuffInstanceId onto the session hook's module handle and deletes the now-vestigial freebuff-session-store. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/app.tsx | 5 + cli/src/chat.tsx | 20 ++- .../components/freebuff-session-countdown.tsx | 29 ++-- cli/src/components/session-ended-banner.tsx | 101 ++++++++++++ cli/src/components/status-bar.tsx | 9 +- .../helpers/__tests__/send-message.test.ts | 17 +- cli/src/hooks/helpers/send-message.ts | 15 +- cli/src/hooks/use-freebuff-session.ts | 149 ++++++++++++++---- cli/src/hooks/use-send-message.ts | 2 +- cli/src/state/freebuff-session-store.ts | 43 ----- cli/src/types/freebuff-session.ts | 20 +++ docs/freebuff-waiting-room.md | 44 +++++- 12 files changed, 337 insertions(+), 117 deletions(-) create mode 100644 cli/src/components/session-ended-banner.tsx delete mode 100644 cli/src/state/freebuff-session-store.ts diff --git a/cli/src/app.tsx b/cli/src/app.tsx index 7c4c631059..ae0cd8ea5a 100644 --- a/cli/src/app.tsx +++ b/cli/src/app.tsx @@ -375,6 +375,10 @@ const AuthedSurface = ({ // 'none' → server lost our row; hook is about to re-POST // Falling through to on 'none' would leave the user unable to send // any free-mode request until the next poll cycle. + // + // 'draining' and 'ended' deliberately fall through to : the agent + // may still be finishing work under the server-side grace period, and the + // chat surface itself swaps the input box for the session-ended banner. if ( IS_FREEBUFF && (session === null || @@ -401,6 +405,7 @@ const AuthedSurface = ({ initialMode={initialMode} gitRoot={gitRoot} onSwitchToGitRoot={onSwitchToGitRoot} + freebuffSession={session} /> ) } diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index cb8b41f550..845af09f75 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -21,7 +21,7 @@ import { ReviewScreen } from './components/review-screen' import { MessageWithAgents } from './components/message-with-agents' import { areCreditsRestored } from './components/out-of-credits-banner' import { PendingBashMessage } from './components/pending-bash-message' -import { useHasActiveFreebuffSession } from './components/freebuff-session-countdown' +import { SessionEndedBanner } from './components/session-ended-banner' import { StatusBar } from './components/status-bar' import { TopBanner } from './components/top-banner' import { getSlashCommandsWithSkills } from './data/slash-commands' @@ -84,6 +84,7 @@ import { computeInputLayoutMetrics } from './utils/text-layout' import type { CommandResult } from './commands/command-registry' import type { MultilineInputHandle } from './components/multiline-input' import type { MatchedSlashCommand } from './hooks/use-suggestion-engine' +import type { FreebuffSessionResponse } from './types/freebuff-session' import type { User } from './utils/auth' import type { AgentMode } from './utils/constants' import type { FileTreeNode } from '@codebuff/common/util/file' @@ -106,6 +107,7 @@ export const Chat = ({ initialMode, gitRoot, onSwitchToGitRoot, + freebuffSession, }: { headerContent: React.ReactNode initialPrompt: string | null @@ -121,6 +123,7 @@ export const Chat = ({ initialMode?: AgentMode gitRoot?: string | null onSwitchToGitRoot?: () => void + freebuffSession: FreebuffSessionResponse | null }) => { const [forceFileOnlyMentions, setForceFileOnlyMentions] = useState(false) @@ -1338,7 +1341,12 @@ export const Chat = ({ return ` ${segments.join(' ')} ` }, [queuePreviewTitle, pausedQueueText]) - const hasActiveFreebuffSession = useHasActiveFreebuffSession() + const hasActiveFreebuffSession = + IS_FREEBUFF && freebuffSession?.status === 'active' + const isFreebuffSessionOver = + IS_FREEBUFF && + (freebuffSession?.status === 'draining' || + freebuffSession?.status === 'ended') const shouldShowStatusLine = !feedbackMode && (hasStatusIndicatorContent || @@ -1447,6 +1455,7 @@ export const Chat = ({ scrollToLatest={scrollToLatest} statusIndicatorState={statusIndicatorState} onStop={chatKeyboardHandlers.onInterruptStream} + freebuffSession={freebuffSession} /> )} @@ -1466,11 +1475,18 @@ export const Chat = ({ )} {reviewMode ? ( + // Review takes precedence over the session-ended banner: during a + // draining session the agent may still be asking to run tools, and + // those approvals must be reachable for the run to finish. + ) : isFreebuffSessionOver ? ( + ) : ( { if (ms <= 0) return 'expiring…' @@ -24,9 +24,10 @@ const formatRemaining = (ms: number): string => { * surprised when their seat is released. Returns null in non-freebuff * builds or when no active session exists — safe to always mount. */ -export const FreebuffSessionCountdown: React.FC = () => { +export const FreebuffSessionCountdown: React.FC<{ + session: FreebuffSessionResponse | null +}> = ({ session }) => { const theme = useTheme() - const session = useFreebuffSessionStore((s) => s.session) const expiresAtMs = session?.status === 'active' ? Date.parse(session.expiresAt) : null @@ -40,21 +41,9 @@ export const FreebuffSessionCountdown: React.FC = () => { if (!IS_FREEBUFF || !expiresAtMs) return null const remainingMs = expiresAtMs - now - const color = - remainingMs < CRITICAL_THRESHOLD_MS - ? theme.error - : remainingMs < LOW_THRESHOLD_MS - ? theme.warning - : theme.muted + // Muted until the final minute, then a soft warning — deliberately not + // `theme.error` so the countdown reads informational, not alarming. + const color = remainingMs < LOW_THRESHOLD_MS ? theme.warning : theme.muted return {formatRemaining(remainingMs)} } - -/** True when the freebuff session countdown will render non-null content. - * Used by the chat surface to keep the status bar visible while a - * session is active, even when there's no streaming/queue activity. */ -export const useHasActiveFreebuffSession = (): boolean => { - return useFreebuffSessionStore( - (s) => IS_FREEBUFF && s.session?.status === 'active', - ) -} diff --git a/cli/src/components/session-ended-banner.tsx b/cli/src/components/session-ended-banner.tsx new file mode 100644 index 0000000000..d1bd71dbd7 --- /dev/null +++ b/cli/src/components/session-ended-banner.tsx @@ -0,0 +1,101 @@ +import { TextAttributes } from '@opentui/core' +import { useKeyboard } from '@opentui/react' +import React, { useCallback, useState } from 'react' + +import { Button } from './button' +import { refreshFreebuffSession } from '../hooks/use-freebuff-session' +import { useTheme } from '../hooks/use-theme' +import { useChatStore } from '../state/chat-store' +import { BORDER_CHARS } from '../utils/ui-constants' + +import type { KeyEvent } from '@opentui/core' + +interface SessionEndedBannerProps { + /** True while an agent request is still streaming under the server-side + * grace window. Swaps the Enter-to-rejoin affordance for a "let it + * finish" hint so the user doesn't abort their in-flight work. */ + isStreaming: boolean +} + +/** + * Replaces the chat input when the freebuff session has ended (client state + * `draining` or `ended`). Captures Enter to re-queue the user; Esc keeps + * falling through to the global stream-interrupt handler so in-flight work + * can be cancelled. + */ +export const SessionEndedBanner: React.FC = ({ + isStreaming, +}) => { + const theme = useTheme() + const [rejoining, setRejoining] = useState(false) + + // While a request is still streaming, rejoin is disabled: it would + // unmount and abort the in-flight agent run. The promise is "we + // let the agent finish" — honoring that means Enter does nothing until + // the stream ends or the user hits Esc. + const canRejoin = !isStreaming && !rejoining + const rejoin = useCallback(() => { + if (!canRejoin) return + setRejoining(true) + // Once the POST lands, the hook flips status to 'queued' and app.tsx + // swaps us into , unmounting this banner. No need to + // clear `rejoining` on success — the component will be gone. + refreshFreebuffSession() + .then(() => { + // Wipe the prior conversation so the next admitted session starts + // with empty history instead of continuing the one that just ended. + useChatStore.getState().reset() + }) + .catch(() => setRejoining(false)) + }, [canRejoin]) + + useKeyboard( + useCallback( + (key: KeyEvent) => { + if (!canRejoin) return + if (key.name === 'return' || key.name === 'enter') { + key.preventDefault?.() + rejoin() + } + }, + [rejoin, canRejoin], + ), + ) + + return ( + + + Your freebuff session has ended. + + {isStreaming ? ( + + Agent is wrapping up. Rejoin the wait room after it's finished. + + ) : ( + + )} + + ) +} diff --git a/cli/src/components/status-bar.tsx b/cli/src/components/status-bar.tsx index e6a2a64e44..6468de73bf 100644 --- a/cli/src/components/status-bar.tsx +++ b/cli/src/components/status-bar.tsx @@ -7,6 +7,7 @@ import { StopButton } from './stop-button' import { useTheme } from '../hooks/use-theme' import { formatElapsedTime } from '../utils/format-elapsed-time' +import type { FreebuffSessionResponse } from '../types/freebuff-session' import type { StatusIndicatorState } from '../utils/status-indicator-state' @@ -18,6 +19,7 @@ interface StatusBarProps { scrollToLatest: () => void statusIndicatorState: StatusIndicatorState onStop?: () => void + freebuffSession: FreebuffSessionResponse | null } export const StatusBar = ({ @@ -26,6 +28,7 @@ export const StatusBar = ({ scrollToLatest, statusIndicatorState, onStop, + freebuffSession, }: StatusBarProps) => { const theme = useTheme() const [elapsedSeconds, setElapsedSeconds] = useState(0) @@ -170,12 +173,12 @@ export const StatusBar = ({ }} > {elapsedTimeContent} - - - {onStop && (statusIndicatorState.kind === 'waiting' || statusIndicatorState.kind === 'streaming') && ( )} + + + ) diff --git a/cli/src/hooks/helpers/__tests__/send-message.test.ts b/cli/src/hooks/helpers/__tests__/send-message.test.ts index 4f36bab721..375ed66ea4 100644 --- a/cli/src/hooks/helpers/__tests__/send-message.test.ts +++ b/cli/src/hooks/helpers/__tests__/send-message.test.ts @@ -1581,7 +1581,7 @@ describe('freebuff gate errors', () => { expect(messages[0].userError).toContain('Another freebuff CLI took over') }) - test('handleRunError maps 410 session_expired to the rejoining message', () => { + test('handleRunError suppresses the inline error for 410 session_expired (ended banner takes over)', () => { const messages = baseMessage() const updater = makeUpdater(messages) handleRunError({ @@ -1594,10 +1594,13 @@ describe('freebuff gate errors', () => { updateChainInProgress: () => {}, }) updater.flush() - expect(messages[0].userError).toContain('no longer active') + // New contract: the gate handler flips the session store into `ended` + // and the session-ended banner is the user-facing signal, so we do NOT + // also surface an inline userError inside the chat transcript. + expect(messages[0].userError).toBeUndefined() }) - test('handleRunError maps 428 waiting_room_required to the rejoining message', () => { + test('handleRunError suppresses the inline error for 428 waiting_room_required (ended banner takes over)', () => { const messages = baseMessage() const updater = makeUpdater(messages) handleRunError({ @@ -1610,7 +1613,7 @@ describe('freebuff gate errors', () => { updateChainInProgress: () => {}, }) updater.flush() - expect(messages[0].userError).toContain('no longer active') + expect(messages[0].userError).toBeUndefined() }) test('handleRunError maps 429 waiting_room_queued to the still-queued message', () => { @@ -1679,6 +1682,10 @@ describe('freebuff gate errors', () => { setHasReceivedPlanResponse: () => {}, }) updater.flush() - expect(messages[0].userError).toContain('no longer active') + // 410 is now handled by the ended banner, not an inline error. The + // assertion here just confirms routing happened via the gate handler + // (which swallows the userError) rather than the generic error path + // (which would set a userError from the message). + expect(messages[0].userError).toBeUndefined() }) }) diff --git a/cli/src/hooks/helpers/send-message.ts b/cli/src/hooks/helpers/send-message.ts index f85bd4b9af..3ed60e488c 100644 --- a/cli/src/hooks/helpers/send-message.ts +++ b/cli/src/hooks/helpers/send-message.ts @@ -1,6 +1,7 @@ import { getErrorObject } from '@codebuff/common/util/error' import { + markFreebuffSessionEnded, markFreebuffSessionSuperseded, refreshFreebuffSession, } from '../use-freebuff-session' @@ -507,14 +508,14 @@ function handleFreebuffGateError( updater: BatchedMessageUpdater, ) { switch (kind) { - case 'waiting_room_required': case 'session_expired': - updater.setError( - 'Your freebuff session is no longer active. Rejoining the waiting room…', - ) - // Re-POST asynchronously; UI flips back to the waiting room as soon as - // the store picks up status: 'queued'. - refreshFreebuffSession().catch(() => {}) + case 'waiting_room_required': + // Our seat is gone mid-chat. Flip to the client-only `ended` state + // instead of auto re-queuing: the Chat surface stays mounted so any + // in-flight agent work can finish under the server-side grace period, + // and the session-ended banner prompts the user to press Enter when + // they're ready to rejoin the waiting room. + markFreebuffSessionEnded() return case 'waiting_room_queued': updater.setError( diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 103e2494e7..a649123155 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -1,7 +1,6 @@ import { env } from '@codebuff/common/env' -import { useEffect } from 'react' +import { useEffect, useState } from 'react' -import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { getAuthTokenDetails } from '../utils/auth' import { IS_FREEBUFF } from '../utils/constants' import { logger } from '../utils/logger' @@ -54,7 +53,13 @@ async function callSession( * no session, server lost our row, or an active session expired. */ function nextMethod(current: FreebuffSessionResponse | null): 'POST' | 'GET' { - if (current?.status === 'queued' || current?.status === 'active') return 'GET' + if ( + current?.status === 'queued' || + current?.status === 'active' || + current?.status === 'draining' + ) { + return 'GET' + } return 'POST' } @@ -63,12 +68,29 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null { case 'queued': return POLL_INTERVAL_QUEUED_MS case 'active': - return POLL_INTERVAL_ACTIVE_MS + // Poll at the normal cadence, but ensure we land just after + // `expires_at` so the draining transition shows up promptly instead + // of leaving the countdown stuck at 0 for up to a full interval. + return Math.max( + 1_000, + Math.min(POLL_INTERVAL_ACTIVE_MS, next.remainingMs + 1_000), + ) + case 'draining': + // Same idea for the hard cutoff — schedule a poll just after + // `gracePeriodEndsAt` so we catch the transition to `none`/`ended`. + return Math.max( + 1_000, + Math.min( + POLL_INTERVAL_ACTIVE_MS, + next.gracePeriodRemainingMs + 1_000, + ), + ) case 'none': // Server lost our row / active session expired — POST again ASAP. return 0 case 'disabled': case 'superseded': + case 'ended': return null } } @@ -81,13 +103,16 @@ interface UseFreebuffSessionResult { interface RefreshHandle { refresh: (opts?: { forcePost?: boolean }) => Promise markSuperseded: () => void + markEnded: () => void + getSession: () => FreebuffSessionResponse | null } /** * Module-level handle to the active hook's poll driver. Set by the hook's * effect on mount; cleared on unmount. Lets external callers (e.g. the * chat-completions gate-error handler) request an immediate re-POST without - * re-plumbing a ref through the component tree. + * re-plumbing a ref through the component tree, and lets non-React code + * (send-message, DELETE on exit) read the current session. */ let activeRefreshHandle: RefreshHandle | null = null @@ -104,16 +129,29 @@ export async function refreshFreebuffSession(): Promise { } /** - * Flip the store into a terminal `superseded` state. Polling stops and the - * UI renders a dedicated "close the other CLI and restart" screen. Called - * after a 409 session_superseded so we don't silently fight the other - * instance for the seat. + * Flip into a terminal `superseded` state. Polling stops and the UI renders + * a dedicated "close the other CLI and restart" screen. Called after a 409 + * session_superseded so we don't silently fight the other instance for the + * seat. */ export function markFreebuffSessionSuperseded(): void { if (!IS_FREEBUFF) return activeRefreshHandle?.markSuperseded() } +/** + * Flip into a client-only `ended` state. Polling stops, the input box is + * hidden, and we wait for the user to press Enter to rejoin. Used both when + * a poll detects we transitioned `active → none` and when the chat gate + * returns 410 session_expired — in both cases, the agent may still be + * finishing an in-flight request under the server-side grace period, so we + * don't want to silently flip into the waiting room. + */ +export function markFreebuffSessionEnded(): void { + if (!IS_FREEBUFF) return + activeRefreshHandle?.markEnded() +} + /** * Best-effort DELETE of the caller's session row. Used by exit paths that * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly @@ -122,8 +160,13 @@ export function markFreebuffSessionSuperseded(): void { */ export async function endFreebuffSessionBestEffort(): Promise { if (!IS_FREEBUFF) return - const current = useFreebuffSessionStore.getState().session - if (!current || (current.status !== 'queued' && current.status !== 'active')) { + const current = activeRefreshHandle?.getSession() ?? null + if ( + !current || + (current.status !== 'queued' && + current.status !== 'active' && + current.status !== 'draining') + ) { return } const { token } = getAuthTokenDetails() @@ -135,6 +178,22 @@ export async function endFreebuffSessionBestEffort(): Promise { } } +/** Read the current instance id for outgoing chat requests. Includes + * `draining` so in-flight agent work can keep streaming during the + * server-side grace window. */ +export function getFreebuffInstanceId(): string | undefined { + const current = activeRefreshHandle?.getSession() ?? null + if (!current) return undefined + if ( + current.status === 'queued' || + current.status === 'active' || + current.status === 'draining' + ) { + return current.instanceId + } + return undefined +} + /** * Manages the freebuff waiting-room session lifecycle: * - POST on mount to join the queue / rotate instance id @@ -146,12 +205,12 @@ export async function endFreebuffSessionBestEffort(): Promise { * In non-freebuff builds the hook seeds `{ status: 'disabled' }` and exits. */ export function useFreebuffSession(): UseFreebuffSessionResult { - const session = useFreebuffSessionStore((s) => s.session) - const lastFetchError = useFreebuffSessionStore((s) => s.lastFetchError) + const [session, setSession] = useState(null) + const [error, setError] = useState(null) useEffect(() => { if (!IS_FREEBUFF) { - useFreebuffSessionStore.getState().setSession({ status: 'disabled' }) + setSession({ status: 'disabled' }) return } @@ -161,7 +220,7 @@ export function useFreebuffSession(): UseFreebuffSessionResult { {}, '[freebuff-session] No auth token; skipping waiting-room admission', ) - useFreebuffSessionStore.getState().setError('Not authenticated') + setError('Not authenticated') return } @@ -169,6 +228,13 @@ export function useFreebuffSession(): UseFreebuffSessionResult { let controller = new AbortController() let timer: ReturnType | null = null let previousStatus: FreebuffSessionResponse['status'] | null = null + let currentSession: FreebuffSessionResponse | null = null + + const applySession = (next: FreebuffSessionResponse) => { + currentSession = next + setSession(next) + setError(null) + } const clearTimer = () => { if (timer) { @@ -185,23 +251,39 @@ export function useFreebuffSession(): UseFreebuffSessionResult { const tick = async (opts: { forcePost?: boolean } = {}) => { if (cancelled) return - const current = useFreebuffSessionStore.getState().session - const method = opts.forcePost ? 'POST' : nextMethod(current) + const method = opts.forcePost ? 'POST' : nextMethod(currentSession) try { const next = await callSession(method, token, controller.signal) if (cancelled) return if (previousStatus === 'queued' && next.status === 'active') { playAdmissionSound() } + + // active/draining → none means we've passed the server's hard + // cutoff. Flip to the client-only `ended` state instead of following + // the usual 'none' re-POST path, so the chat surface stays mounted + // and the user gets a gentle Enter-to-rejoin prompt rather than a + // sudden yank into the waiting room. The normal drain path goes + // active → draining → ended; the `active → none` branch covers the + // edge case where a poll misses draining entirely. + if ( + (previousStatus === 'active' || previousStatus === 'draining') && + next.status === 'none' + ) { + previousStatus = 'ended' + applySession({ status: 'ended' }) + return + } + previousStatus = next.status - useFreebuffSessionStore.getState().setSession(next) + applySession(next) const delay = nextDelayMs(next) if (delay !== null) schedule(delay) - } catch (error) { + } catch (err) { if (cancelled || controller.signal.aborted) return - const msg = error instanceof Error ? error.message : String(error) + const msg = err instanceof Error ? err.message : String(err) logger.warn({ error: msg }, '[freebuff-session] fetch failed') - useFreebuffSessionStore.getState().setError(msg) + setError(msg) schedule(POLL_INTERVAL_ERROR_MS) } } @@ -226,8 +308,15 @@ export function useFreebuffSession(): UseFreebuffSessionResult { clearTimer() controller.abort() previousStatus = 'superseded' - useFreebuffSessionStore.getState().setSession({ status: 'superseded' }) + applySession({ status: 'superseded' }) }, + markEnded: () => { + clearTimer() + controller.abort() + previousStatus = 'ended' + applySession({ status: 'ended' }) + }, + getSession: () => currentSession, } return () => { @@ -238,19 +327,19 @@ export function useFreebuffSession(): UseFreebuffSessionResult { // Fire-and-forget DELETE. Only release if we actually held a slot so we // don't generate spurious DELETEs (e.g. HMR before POST completes). - const current = useFreebuffSessionStore.getState().session if ( - current && - (current.status === 'queued' || current.status === 'active') + currentSession && + (currentSession.status === 'queued' || + currentSession.status === 'active' || + currentSession.status === 'draining') ) { callSession('DELETE', token).catch(() => {}) } - useFreebuffSessionStore.getState().reset() + currentSession = null + setSession(null) + setError(null) } }, []) - return { - session, - error: lastFetchError, - } + return { session, error } } diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts index 915692151c..03fc065c05 100644 --- a/cli/src/hooks/use-send-message.ts +++ b/cli/src/hooks/use-send-message.ts @@ -3,7 +3,7 @@ import { useCallback, useEffect, useRef } from 'react' import { setCurrentChatId } from '../project-files' import { createStreamController } from './stream-state' import { useChatStore } from '../state/chat-store' -import { getFreebuffInstanceId } from '../state/freebuff-session-store' +import { getFreebuffInstanceId } from './use-freebuff-session' import { getCodebuffClient } from '../utils/codebuff-client' import { AGENT_MODE_TO_ID, AGENT_MODE_TO_COST_MODE, IS_FREEBUFF } from '../utils/constants' import { createEventHandlerState } from '../utils/create-event-handler-state' diff --git a/cli/src/state/freebuff-session-store.ts b/cli/src/state/freebuff-session-store.ts deleted file mode 100644 index ad42fc0078..0000000000 --- a/cli/src/state/freebuff-session-store.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { create } from 'zustand' - -import type { FreebuffSessionResponse } from '../types/freebuff-session' - -/** - * Snapshot of the waiting-room / active-session state reported by the server. - * Stored globally so both the waiting-room UI and the send-message path can - * read the current instance id without prop drilling. - */ -interface FreebuffSessionState { - session: FreebuffSessionResponse | null - lastFetchError: string | null -} - -interface FreebuffSessionActions { - setSession: (session: FreebuffSessionResponse) => void - setError: (error: string | null) => void - reset: () => void -} - -type FreebuffSessionStore = FreebuffSessionState & FreebuffSessionActions - -const initialState: FreebuffSessionState = { - session: null, - lastFetchError: null, -} - -export const useFreebuffSessionStore = create((set) => ({ - ...initialState, - setSession: (session) => set({ session, lastFetchError: null }), - setError: (lastFetchError) => set({ lastFetchError }), - reset: () => set(initialState), -})) - -/** Read the current instance id for outgoing chat requests. */ -export const getFreebuffInstanceId = (): string | undefined => { - const { session } = useFreebuffSessionStore.getState() - if (!session) return undefined - if (session.status === 'queued' || session.status === 'active') { - return session.instanceId - } - return undefined -} diff --git a/cli/src/types/freebuff-session.ts b/cli/src/types/freebuff-session.ts index d384825ad5..528a078aa1 100644 --- a/cli/src/types/freebuff-session.ts +++ b/cli/src/types/freebuff-session.ts @@ -21,6 +21,18 @@ export type FreebuffSessionServerResponse = expiresAt: string remainingMs: number } + | { + /** Session is past `expiresAt` but still inside the server-side grace + * window. The CLI must stop accepting new prompts but may finish any + * in-flight agent run. Hard cutoff at `gracePeriodEndsAt`; past that + * the chat gate rejects with `session_expired`. */ + status: 'draining' + instanceId: string + admittedAt: string + expiresAt: string + gracePeriodEndsAt: string + gracePeriodRemainingMs: number + } /** * Client-only terminal state set when the server reports `session_superseded` @@ -29,5 +41,13 @@ export type FreebuffSessionServerResponse = export type FreebuffSessionResponse = | FreebuffSessionServerResponse | { status: 'superseded' } + /** + * Client-only fallback set when we lose the seat via a path that doesn't + * pass through `draining` — e.g. the chat gate returns 410 session_expired + * past the hard cutoff, or a poll goes straight from `active` to `none`. + * Same UX as `draining` (hidden input + Enter-to-rejoin banner) but with + * no grace countdown to display. + */ + | { status: 'ended' } export type FreebuffSessionStatus = FreebuffSessionResponse['status'] diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 47ab38b802..06b8ce8a67 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -5,7 +5,7 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs: 1. **Drip-admit users** — admit at a steady trickle (default 1 per 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long. -2. **Gate on upstream health** — only admit new users while the Fireworks deployment is reporting `healthy` (via the separate monitor in `web/src/server/fireworks-monitor/`). Once metrics degrade, admission halts until they recover — this is the primary concurrency control, not a static cap. +2. **Gate on upstream health** — before each admission tick, probe the Fireworks metrics endpoint with a short timeout (`isFireworksAdmissible` in `web/src/server/free-session/admission.ts`). If it doesn't respond OK, admission halts until it does — this is the primary concurrency control, not a static cap. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. Users who cannot be admitted immediately are placed in a FIFO queue and given an estimated wait time. Admitted users get a fixed-length session (default 1h) during which they can make free-mode requests subject to the existing per-user rate limits. @@ -20,6 +20,7 @@ FREEBUFF_WAITING_ROOM_ENABLED=false # Other knobs (only read when enabled) FREEBUFF_SESSION_LENGTH_MS=3600000 # 1 hour +FREEBUFF_SESSION_GRACE_MS=1800000 # 30 min — drain window after expiry ``` Flipping the flag is safe at runtime: existing rows stay in the DB and will be admitted / expired correctly whenever the flag is flipped back on. @@ -90,13 +91,19 @@ Migration: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql`. stateDiagram-v2 [*] --> queued: POST /session
(first call) queued --> active: admission tick
(capacity + healthy) - active --> expired: expires_at < now() + active --> draining: expires_at < now()
(grace window) + draining --> expired: expires_at + grace < now() expired --> queued: POST /session
(re-queue at back) queued --> [*]: DELETE /session active --> [*]: DELETE /session
or admission sweep + draining --> [*]: DELETE /session
or admission sweep ``` -There is no stored `expired` status. An `active` row whose `expires_at` is in the past is treated as expired by `checkSessionAdmissible` and swept by the admission ticker. +Neither `draining` nor `expired` is a stored status — they are derived from `expires_at` versus `now()` and the grace window: + +- `expires_at > now()` → `active` (gate: `ok: 'active'`) +- `expires_at <= now() < expires_at + grace` → `draining` (gate: `ok: 'draining'`; client must stop accepting new prompts but can let an in-flight agent finish) +- `expires_at + grace <= now()` → `expired` (gate: `session_expired`); swept by the admission ticker ## Single-instance Enforcement @@ -135,6 +142,7 @@ Each tick does (in order): | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires | | `MAX_ADMITS_PER_TICK` | `config.ts` | 1 | Upper bound on admits per tick | | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | +| `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | ## HTTP API @@ -173,6 +181,17 @@ Response shapes: "expiresAt": "2026-04-17T13:00:00Z", "remainingMs": 3600000 } + +// Past expiresAt but inside the grace window — agent in flight may finish, +// CLI must not accept new user prompts. +{ + "status": "draining", + "instanceId": "e47…", + "admittedAt": "2026-04-17T12:00:00Z", + "expiresAt": "2026-04-17T13:00:00Z", + "gracePeriodEndsAt": "2026-04-17T13:30:00Z", + "gracePeriodRemainingMs": 1800000 +} ``` ### `GET /api/v1/freebuff/session` @@ -201,10 +220,23 @@ For free-mode requests (`codebuff_metadata.cost_mode === 'free'`), `_post.ts` ca | 428 | `waiting_room_required` | No session row exists. Client should call POST /session. | | 429 | `waiting_room_queued` | Row exists with `status='queued'`. Client should keep polling GET. | | 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. | -| 410 | `session_expired` | Row exists with `status='active'` but `expires_at < now()`. Client should POST /session to re-queue. | +| 410 | `session_expired` | `expires_at + grace < now()` (past the hard cutoff). Client should POST /session to re-queue. | + +Successful results carry one of three reasons: `disabled` (gate is off), `active` (`expires_at > now()`, `remainingMs` provided), or `draining` (`expires_at <= now() < expires_at + grace`, `gracePeriodRemainingMs` provided). The CLI should treat `draining` as "let any in-flight agent run finish, but block new user prompts" — see [Drain / Grace Window](#drain--grace-window) below. When the waiting room is disabled, the gate returns `{ ok: true, reason: 'disabled' }` without touching the DB. +## Drain / Grace Window + +We don't want to kill an agent mid-run just because the user's session ticked over. After `expires_at`, the row enters a `draining` state for `FREEBUFF_SESSION_GRACE_MS` (default 30 min). During the drain window: + +- `checkSessionAdmissible` returns `{ ok: true, reason: 'draining', gracePeriodRemainingMs }` — chat completions still go through. +- `getSessionState` / `requestSession` return `status: 'draining'` so the CLI can render a "session ending — agent finishing up" indicator and disable the input box. +- `sweepExpired` skips the row, keeping it in the DB so the gate keeps working. +- `joinOrTakeOver` still treats the row as expired (`expires_at <= now()`), so a fresh POST re-queues at the back of the line. This means starting a new CLI during the drain window cleanly hands off to a queued seat rather than extending the current one. + +This is a **trust-the-client** design: the server still admits requests during the drain window, and we rely on the CLI to stop submitting new user prompts at `expires_at`. The 30-min hard cutoff caps the abuse surface — a malicious client that ignores the contract can extend a session by at most one grace window per expiry. + ## Estimated Wait Time Computed in `session-view.ts` from the drip-admission rate: @@ -247,6 +279,7 @@ The `disabled` response means the server has the waiting room turned off. CLI sh | Attack | Mitigation | |---|---| +| CLI keeps submitting new prompts past `expires_at` | Trusted client; bounded by 30-min hard cutoff at `expires_at + grace`. After that the gate returns `session_expired` and the user must re-queue. | | Multiple sessions per account | PK on `user_id` — structurally impossible | | Multiple CLIs sharing one session | `active_instance_id` rotates on POST; stale id → 409 | | Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) | @@ -254,8 +287,7 @@ The `disabled` response means the server has the waiting room turned off. CLI sh | Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users | | Two pods admitting the same user | `SELECT ... FOR UPDATE SKIP LOCKED` + advisory xact lock | | Spamming POST/GET to starve admission tick | Admission uses Postgres advisory lock; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | -| Low-traffic error-fraction flapping blocking admissions | Health monitor has `minRequestRateForErrorCheck` floor (see `fireworks-monitor`) | -| Monitor down / metrics stale | `isFireworksAdmissible()` fails closed → admission pauses, queue grows | +| Fireworks metrics endpoint down / slow | `isFireworksAdmissible()` fails closed (timeout or non-OK) → admission pauses, queue grows | | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing From 0204a371e82845fa298d0e668f299fd67f2daf64 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 17:51:24 -0700 Subject: [PATCH 23/31] Replace Fireworks Prometheus monitor with reachability probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the 1.5k-LOC fireworks-monitor package (Prometheus scrape, health computation, admin endpoint, CLI scripts) in favor of a single-function reachability probe inline in free-session/admission.ts: GET the account metrics endpoint with a 5s timeout and fail closed on non-OK. The full-health-scoring machinery was load-bearing on nothing — admission only ever read the boolean gate, and reachability is what actually matters for halting during an outage. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/freebuff-waiting-room.md | 10 +- scripts/check-fireworks-health.ts | 142 -------- web/instrumentation.ts | 3 - web/scripts/scrape-check.ts | 54 --- .../__tests__/fireworks-health.test.ts | 66 ---- .../app/api/admin/fireworks-health/_get.ts | 22 -- .../app/api/admin/fireworks-health/route.ts | 11 - .../__tests__/compute-health.test.ts | 291 ---------------- .../__tests__/monitor.test.ts | 189 ----------- .../__tests__/parse-prometheus.test.ts | 116 ------- .../fireworks-monitor/compute-health.ts | 294 ---------------- web/src/server/fireworks-monitor/monitor.ts | 316 ------------------ .../fireworks-monitor/parse-prometheus.ts | 147 -------- web/src/server/fireworks-monitor/types.ts | 41 --- .../free-session/__tests__/admission.test.ts | 6 +- web/src/server/free-session/admission.ts | 45 ++- 16 files changed, 46 insertions(+), 1707 deletions(-) delete mode 100644 scripts/check-fireworks-health.ts delete mode 100644 web/scripts/scrape-check.ts delete mode 100644 web/src/app/api/admin/fireworks-health/__tests__/fireworks-health.test.ts delete mode 100644 web/src/app/api/admin/fireworks-health/_get.ts delete mode 100644 web/src/app/api/admin/fireworks-health/route.ts delete mode 100644 web/src/server/fireworks-monitor/__tests__/compute-health.test.ts delete mode 100644 web/src/server/fireworks-monitor/__tests__/monitor.test.ts delete mode 100644 web/src/server/fireworks-monitor/__tests__/parse-prometheus.test.ts delete mode 100644 web/src/server/fireworks-monitor/compute-health.ts delete mode 100644 web/src/server/fireworks-monitor/monitor.ts delete mode 100644 web/src/server/fireworks-monitor/parse-prometheus.ts delete mode 100644 web/src/server/fireworks-monitor/types.ts diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 06b8ce8a67..91bfb190ca 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -35,7 +35,7 @@ flowchart LR Gate[checkSessionAdmissible] Ticker[Admission Ticker
every 5s, 1 pod] Store[(free_session
Postgres)] - Monitor[FireworksMonitor
isFireworksAdmissible] + Probe[isFireworksAdmissible
Fireworks metrics GET] CLI -- "POST on startup
(gets instance_id)" --> SessionAPI CLI -- "GET to poll state" --> SessionAPI @@ -44,7 +44,7 @@ flowchart LR ChatAPI --> Gate Gate --> Store Ticker --> Store - Ticker --> Monitor + Ticker --> Probe ``` ### Components @@ -123,7 +123,7 @@ The rotation is important: it happens even if the caller is already in the `acti ### What this does NOT prevent - A single user manually syncing `instance_id` between two CLIs (e.g. editing a config file). This is possible but requires them to re-sync after every startup call, so it's high-friction. We accept this. -- A user creating multiple accounts. That is covered by other gates (MIN_ACCOUNT_AGE_FOR_PAID_MS, geo check) and the Fireworks monitor's overall throttle. +- A user creating multiple accounts. That is covered by other gates (MIN_ACCOUNT_AGE_FOR_PAID_MS, geo check) and the overall drip-admission rate. ## Admission Loop @@ -132,8 +132,8 @@ One pod runs the admission loop at a time, coordinated via Postgres advisory loc Each tick does (in order): 1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now()`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage. -2. **Check upstream health.** `isFireworksAdmissible()` from the monitor. If not `healthy`, skip admission for this tick (queue grows; users see `status: 'queued'` with increasing position). -3. **Admit.** `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT MAX_ADMITS_PER_TICK FOR UPDATE SKIP LOCKED`, then `UPDATE` those rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. Staggering the queue at `MAX_ADMITS_PER_TICK=1` / 15s keeps Fireworks from getting hit by a thundering herd of newly-admitted CLIs; once metrics show the deployment is saturated, step 2 halts further admissions. +2. **Check upstream reachability.** `isFireworksAdmissible()` does a short-timeout GET against the Fireworks account metrics endpoint. If it doesn't respond OK, skip admission for this tick (queue grows; users see `status: 'queued'` with increasing position). +3. **Admit.** `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT MAX_ADMITS_PER_TICK FOR UPDATE SKIP LOCKED`, then `UPDATE` those rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. Staggering the queue at `MAX_ADMITS_PER_TICK=1` / 15s keeps Fireworks from getting hit by a thundering herd of newly-admitted CLIs; if the probe starts failing, step 2 halts further admissions. ### Tunables diff --git a/scripts/check-fireworks-health.ts b/scripts/check-fireworks-health.ts deleted file mode 100644 index 6d51ab9d46..0000000000 --- a/scripts/check-fireworks-health.ts +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env bun - -/** - * Scrape Fireworks metrics once and print the health snapshot the - * web server's monitor would produce. Useful for ad-hoc verification. - * - * Usage: - * bun scripts/check-fireworks-health.ts - * bun scripts/check-fireworks-health.ts --raw # also print raw metrics count - * bun scripts/check-fireworks-health.ts --json # machine-readable output - * - * Reads FIREWORKS_API_KEY from env (.env.local is loaded automatically by bun). - */ - -import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '../web/src/server/fireworks-monitor/compute-health' -import { parsePrometheusText } from '../web/src/server/fireworks-monitor/parse-prometheus' -import { - FIREWORKS_ACCOUNT_ID, - FIREWORKS_DEPLOYMENT_MAP, -} from '../web/src/llm-api/fireworks-config' - -import type { DeploymentHealthStatus } from '../web/src/server/fireworks-monitor/types' - -const METRICS_URL = (accountId: string) => - `https://api.fireworks.ai/v1/accounts/${accountId}/metrics` - -async function scrapeFireworksMetrics(params: { apiKey: string; accountId: string }) { - const response = await fetch(METRICS_URL(params.accountId), { - headers: { Authorization: `Bearer ${params.apiKey}` }, - }) - if (!response.ok) { - const body = await response.text().catch(() => '') - throw new Error( - `Fireworks metrics scrape failed: ${response.status} ${response.statusText}${body ? ` — ${body.slice(0, 300)}` : ''}`, - ) - } - const text = await response.text() - return parsePrometheusText(text) -} - -const STATUS_COLORS: Record = { - healthy: '\x1b[32m', - degraded: '\x1b[33m', - unhealthy: '\x1b[31m', - unknown: '\x1b[90m', -} -const RESET = '\x1b[0m' - -function formatMs(value: number | null): string { - if (value === null) return 'n/a' - if (value >= 1000) return `${(value / 1000).toFixed(2)}s` - return `${Math.round(value)}ms` -} - -function formatPct(value: number, digits = 1): string { - return `${(value * 100).toFixed(digits)}%` -} - -async function main() { - const args = process.argv.slice(2) - const jsonMode = args.includes('--json') - const showRaw = args.includes('--raw') - - const apiKey = process.env.FIREWORKS_API_KEY - if (!apiKey) { - console.error('❌ FIREWORKS_API_KEY is not set. Add it to .env.local or export it.') - process.exit(1) - } - - const accountId = process.env.FIREWORKS_ACCOUNT_ID ?? FIREWORKS_ACCOUNT_ID - const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP) - - const scrapeStart = Date.now() - let metrics - try { - metrics = await scrapeFireworksMetrics({ apiKey, accountId }) - } catch (error) { - console.error('❌ Scrape failed:', error instanceof Error ? error.message : error) - process.exit(1) - } - const scrapeElapsedMs = Date.now() - scrapeStart - - const snapshot = computeSnapshot({ - metrics, - deployments, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - - if (jsonMode) { - console.log(JSON.stringify({ scrapeElapsedMs, sampleCount: metrics.samples.length, snapshot }, null, 2)) - return - } - - console.log('🔥 Fireworks Deployment Health') - console.log('='.repeat(78)) - console.log(`Account: accounts/${accountId}`) - console.log(`Scraped in: ${scrapeElapsedMs}ms`) - console.log(`Samples: ${metrics.samples.length}`) - console.log(`Overall: ${STATUS_COLORS[snapshot.overall]}${snapshot.overall.toUpperCase()}${RESET}`) - if (snapshot.lastError) console.log(`Last error: ${snapshot.lastError}`) - console.log() - - const modelByDeployment = Object.fromEntries( - Object.entries(FIREWORKS_DEPLOYMENT_MAP).map(([model, dep]) => [dep, model]), - ) - - for (const [deployment, health] of Object.entries(snapshot.deployments)) { - const model = modelByDeployment[deployment] ?? '(unknown model)' - const color = STATUS_COLORS[health.status] - console.log(`── ${color}${health.status.toUpperCase().padEnd(9)}${RESET} ${model}`) - console.log(` deployment: ${deployment}`) - console.log(` base model: ${health.baseModel ?? 'n/a'}`) - console.log(` replicas: ${health.metrics.replicas ?? 'n/a'}`) - console.log(` request rate: ${health.metrics.requestRate.toFixed(3)} req/s`) - console.log(` error rate: ${health.metrics.errorRate.toFixed(3)} err/s (${formatPct(health.metrics.errorFraction)})`) - console.log(` concurrent requests: ${health.metrics.concurrentRequests.toFixed(2)}`) - console.log(` KV blocks utilization: ${formatPct(health.metrics.kvBlocksFraction, 0)}`) - console.log(` KV slots utilization: ${formatPct(health.metrics.kvSlotsFraction, 0)}`) - console.log(` p50 queue wait: ${formatMs(health.metrics.p50GenerationQueueMs)}`) - console.log(` p50 TTFT: ${formatMs(health.metrics.p50TimeToFirstTokenMs)}`) - if (health.reasons.length > 0) { - console.log(` reasons: ${health.reasons.join('; ')}`) - } - console.log() - } - - if (showRaw) { - console.log('── Metric name breakdown ─────────────────────────────') - const counts = new Map() - for (const s of metrics.samples) { - counts.set(s.name, (counts.get(s.name) ?? 0) + 1) - } - const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]) - for (const [name, count] of sorted) { - console.log(` ${String(count).padStart(4)} ${name}`) - } - } - - process.exit(snapshot.overall === 'unhealthy' ? 2 : 0) -} - -main() diff --git a/web/instrumentation.ts b/web/instrumentation.ts index 6dbcf3eaa5..422a11c9e0 100644 --- a/web/instrumentation.ts +++ b/web/instrumentation.ts @@ -8,7 +8,6 @@ * causing Render's proxy to return 502 Bad Gateway errors. */ -import { startFireworksMonitor } from '@/server/fireworks-monitor/monitor' import { logger } from '@/util/logger' export async function register() { @@ -47,8 +46,6 @@ export async function register() { logger.info({}, '[Instrumentation] Global error handlers registered') - startFireworksMonitor() - // DB-touching admission module uses `postgres`, which imports Node built-ins // like `crypto`. Gate on NEXT_RUNTIME so the edge bundle doesn't try to // resolve them. diff --git a/web/scripts/scrape-check.ts b/web/scripts/scrape-check.ts deleted file mode 100644 index d4b863135b..0000000000 --- a/web/scripts/scrape-check.ts +++ /dev/null @@ -1,54 +0,0 @@ -/** - * One-off: scrape Fireworks metrics for each configured deployment and print - * the same health summary the admission gate would see. - * - * Usage: - * bun run web/scripts/scrape-check.ts - */ - -import { env } from '@codebuff/internal/env' - -import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '@/server/fireworks-monitor/compute-health' -import { scrapeFireworksMetrics } from '@/server/fireworks-monitor/monitor' -import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config' - -async function main() { - const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP) - const metrics = await scrapeFireworksMetrics({ - apiKey: env.FIREWORKS_API_KEY, - accountId: FIREWORKS_ACCOUNT_ID, - }) - const snapshot = computeSnapshot({ - metrics, - deployments, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - - console.log(`scrapedAt: ${new Date(snapshot.scrapedAt ?? 0).toISOString()}`) - console.log(`overall: ${snapshot.overall}\n`) - - for (const [deployment, health] of Object.entries(snapshot.deployments)) { - console.log(`── ${deployment} (${health.baseModel ?? 'unknown'})`) - console.log(` status: ${health.status}`) - console.log(` replicas: ${health.metrics.replicas}`) - console.log(` req/s: ${health.metrics.requestRate.toFixed(2)}`) - console.log(` errors: ${(health.metrics.errorFraction * 100).toFixed(2)}%`) - console.log(` kvBlocks: ${(health.metrics.kvBlocksFraction * 100).toFixed(1)}%`) - console.log(` kvSlots: ${(health.metrics.kvSlotsFraction * 100).toFixed(1)}%`) - console.log(` concurrent: ${health.metrics.concurrentRequests.toFixed(1)}`) - const q = health.metrics.p50GenerationQueueMs - const t = health.metrics.p50TimeToFirstTokenMs - console.log(` p50 queue: ${q === null ? 'n/a' : `${Math.round(q)}ms`}`) - console.log(` p50 TTFT: ${t === null ? 'n/a' : `${Math.round(t)}ms`}`) - if (health.reasons.length > 0) { - console.log(` reasons:`) - for (const r of health.reasons) console.log(` - ${r}`) - } - console.log() - } -} - -void main().catch((error) => { - console.error(error) - process.exit(1) -}) diff --git a/web/src/app/api/admin/fireworks-health/__tests__/fireworks-health.test.ts b/web/src/app/api/admin/fireworks-health/__tests__/fireworks-health.test.ts deleted file mode 100644 index 7cf42b10f5..0000000000 --- a/web/src/app/api/admin/fireworks-health/__tests__/fireworks-health.test.ts +++ /dev/null @@ -1,66 +0,0 @@ -import { describe, expect, test } from 'bun:test' -import { NextResponse } from 'next/server' - -import { getFireworksHealth } from '../_get' - -import type { FireworksHealthSnapshot } from '@/server/fireworks-monitor/types' - -function snapshot( - overall: FireworksHealthSnapshot['overall'], -): FireworksHealthSnapshot { - return { - scrapedAt: 1000, - ageMs: 0, - overall, - deployments: {}, - lastError: null, - } -} - -const allowAdmin = async () => ({ id: 'admin-user', email: 'admin@example.com' }) -const forbidAdmin = async () => - NextResponse.json({ error: 'Forbidden - not an admin' }, { status: 403 }) - -describe('/api/admin/fireworks-health', () => { - test('returns 403 when caller is not an admin', async () => { - const response = await getFireworksHealth({ - getSnapshot: () => snapshot('healthy'), - checkAdminAuth: forbidAdmin, - }) - expect(response.status).toBe(403) - }) - - test('returns 200 with snapshot when overall is healthy', async () => { - const response = await getFireworksHealth({ - getSnapshot: () => snapshot('healthy'), - checkAdminAuth: allowAdmin, - }) - expect(response.status).toBe(200) - const body = await response.json() - expect(body.overall).toBe('healthy') - }) - - test('returns 200 when degraded', async () => { - const response = await getFireworksHealth({ - getSnapshot: () => snapshot('degraded'), - checkAdminAuth: allowAdmin, - }) - expect(response.status).toBe(200) - }) - - test('returns 200 when unknown (no scrape yet)', async () => { - const response = await getFireworksHealth({ - getSnapshot: () => snapshot('unknown'), - checkAdminAuth: allowAdmin, - }) - expect(response.status).toBe(200) - }) - - test('returns 503 when overall is unhealthy', async () => { - const response = await getFireworksHealth({ - getSnapshot: () => snapshot('unhealthy'), - checkAdminAuth: allowAdmin, - }) - expect(response.status).toBe(503) - }) -}) diff --git a/web/src/app/api/admin/fireworks-health/_get.ts b/web/src/app/api/admin/fireworks-health/_get.ts deleted file mode 100644 index 1b40b5cb41..0000000000 --- a/web/src/app/api/admin/fireworks-health/_get.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { NextResponse } from 'next/server' - -import type { FireworksHealthSnapshot } from '@/server/fireworks-monitor/types' - -export interface FireworksHealthDeps { - getSnapshot: () => FireworksHealthSnapshot - checkAdminAuth: () => Promise -} - -export async function getFireworksHealth({ - getSnapshot, - checkAdminAuth, -}: FireworksHealthDeps) { - const authResult = await checkAdminAuth() - if (authResult instanceof NextResponse) { - return authResult - } - - const snapshot = getSnapshot() - const httpStatus = snapshot.overall === 'unhealthy' ? 503 : 200 - return NextResponse.json(snapshot, { status: httpStatus }) -} diff --git a/web/src/app/api/admin/fireworks-health/route.ts b/web/src/app/api/admin/fireworks-health/route.ts deleted file mode 100644 index 2307c4398e..0000000000 --- a/web/src/app/api/admin/fireworks-health/route.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { getFireworksHealth } from './_get' - -import { checkAdminAuth } from '@/lib/admin-auth' -import { getFireworksHealthSnapshot } from '@/server/fireworks-monitor/monitor' - -export const GET = () => { - return getFireworksHealth({ - getSnapshot: getFireworksHealthSnapshot, - checkAdminAuth, - }) -} diff --git a/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts b/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts deleted file mode 100644 index d62dab938e..0000000000 --- a/web/src/server/fireworks-monitor/__tests__/compute-health.test.ts +++ /dev/null @@ -1,291 +0,0 @@ -import { describe, expect, test } from 'bun:test' - -import { - computeDeploymentHealth, - computeSnapshot, - DEFAULT_HEALTH_THRESHOLDS, -} from '../compute-health' -import { parsePrometheusText } from '../parse-prometheus' - -const DEPLOYMENT = 'accounts/test-acc/deployments/d1' - -function fixture(params: { - requestRate?: number - errorRate?: number - errorCode?: string - concurrent?: number - kvBlocks?: number - kvSlots?: number - queueBuckets?: Array<{ le: string; count: number }> - ttftBuckets?: Array<{ le: string; count: number }> - /** deployment_replicas gauge. Defaults to 1 so existing tests stay healthy. - * Set to 0 or null to simulate a cold/deleted deployment. */ - replicas?: number | null -}): string { - const lines: string[] = [] - const labels = `base_model="m",deployment="${DEPLOYMENT}",deployment_account="test-acc",deployment_id="d1"` - const replicas = params.replicas === undefined ? 1 : params.replicas - if (replicas !== null) { - lines.push( - `deployment_replicas{deployment_account="test-acc",deployment_id="d1"} ${replicas}`, - ) - } - if (params.requestRate !== undefined) { - lines.push(`request_counter_total:sum_by_deployment{${labels}} ${params.requestRate}`) - } - if (params.errorRate !== undefined) { - const code = params.errorCode ?? '500' - lines.push( - `requests_error_total:sum_by_deployment{${labels},http_code="${code}"} ${params.errorRate}`, - ) - } - if (params.concurrent !== undefined) { - lines.push( - `requests_coordinator_concurrent_count:avg_by_deployment{${labels}} ${params.concurrent}`, - ) - } - if (params.kvBlocks !== undefined) { - lines.push( - `generator_kv_blocks_fraction:avg_by_deployment{${labels}} ${params.kvBlocks}`, - ) - } - if (params.kvSlots !== undefined) { - lines.push( - `generator_kv_slots_fraction:avg_by_deployment{${labels}} ${params.kvSlots}`, - ) - } - for (const bucket of params.queueBuckets ?? []) { - lines.push( - `latency_generation_queue_ms_bucket:sum_by_deployment{${labels},le="${bucket.le}"} ${bucket.count}`, - ) - } - for (const bucket of params.ttftBuckets ?? []) { - lines.push( - `latency_to_first_token_ms_bucket:sum_by_deployment{${labels},le="${bucket.le}"} ${bucket.count}`, - ) - } - return lines.join('\n') -} - -describe('computeDeploymentHealth', () => { - test('healthy deployment with low error rate and low utilization', () => { - const metrics = parsePrometheusText( - fixture({ - requestRate: 10, - errorRate: 0, - concurrent: 3, - kvBlocks: 0.2, - kvSlots: 0.2, - queueBuckets: [ - { le: '100', count: 50 }, - { le: '1000', count: 100 }, - { le: '+Inf', count: 100 }, - ], - ttftBuckets: [ - { le: '500', count: 60 }, - { le: '2000', count: 100 }, - { le: '+Inf', count: 100 }, - ], - }), - ) - - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - - expect(health.status).toBe('healthy') - expect(health.reasons).toEqual([]) - expect(health.deploymentId).toBe('d1') - expect(health.baseModel).toBe('m') - expect(health.metrics.errorFraction).toBe(0) - }) - - test('flags high error rate as unhealthy', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 10, errorRate: 2, kvBlocks: 0.1 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.metrics.errorFraction).toBeCloseTo(0.2, 5) - expect(health.reasons.some((r) => r.includes('error rate'))).toBe(true) - }) - - test('flags mid error rate as degraded', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 100, errorRate: 5, kvBlocks: 0.1 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('degraded') - expect(health.metrics.errorFraction).toBeCloseTo(0.05, 5) - }) - - test('flags saturated KV cache as unhealthy', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 10, errorRate: 0, kvBlocks: 0.995 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.reasons.some((r) => r.includes('KV blocks'))).toBe(true) - }) - - test('flags long queue wait as unhealthy', () => { - const metrics = parsePrometheusText( - fixture({ - requestRate: 10, - errorRate: 0, - kvBlocks: 0.3, - queueBuckets: [ - { le: '5000', count: 0 }, - { le: '20000', count: 100 }, - { le: '+Inf', count: 100 }, - ], - }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.reasons.some((r) => r.includes('queue'))).toBe(true) - }) - - test('skips error-fraction check when request rate is below the floor', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 0.05, errorRate: 0.05, kvBlocks: 0.1 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.metrics.errorFraction).toBeCloseTo(1.0, 5) - expect(health.status).toBe('healthy') - expect(health.reasons.some((r) => r.includes('error rate'))).toBe(false) - }) - - test('still applies error-fraction check at or above the floor', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 0.1, errorRate: 0.05, kvBlocks: 0.1 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.reasons.some((r) => r.includes('error rate'))).toBe(true) - }) - - test('flags deployment with zero replicas as unhealthy', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 0, errorRate: 0, kvBlocks: 0, replicas: 0 }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.metrics.replicas).toBe(0) - expect(health.reasons.some((r) => r.includes('replicas'))).toBe(true) - }) - - test('flags deployment with no replicas metric as unhealthy (cold / deleted)', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 0, errorRate: 0, kvBlocks: 0, replicas: null }), - ) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.status).toBe('unhealthy') - expect(health.metrics.replicas).toBeNull() - expect(health.reasons.some((r) => r.includes('cold or deleted'))).toBe(true) - }) - - test('sums error counters across multiple HTTP codes', () => { - const labels = `base_model="m",deployment="${DEPLOYMENT}",deployment_id="d1"` - const text = [ - `deployment_replicas{deployment_account="test-acc",deployment_id="d1"} 1`, - `request_counter_total:sum_by_deployment{${labels}} 100`, - `requests_error_total:sum_by_deployment{${labels},http_code="500"} 3`, - `requests_error_total:sum_by_deployment{${labels},http_code="429"} 5`, - `generator_kv_blocks_fraction:avg_by_deployment{${labels}} 0.1`, - ].join('\n') - const metrics = parsePrometheusText(text) - const health = computeDeploymentHealth({ - deployment: DEPLOYMENT, - metrics, - thresholds: DEFAULT_HEALTH_THRESHOLDS, - }) - expect(health.metrics.errorRate).toBe(8) - expect(health.metrics.errorFraction).toBeCloseTo(0.08, 5) - expect(health.status).toBe('degraded') - }) -}) - -describe('computeSnapshot', () => { - test('marks deployments as unknown when metrics have never been fetched', () => { - const snap = computeSnapshot({ - metrics: null, - deployments: [DEPLOYMENT], - now: 1000, - }) - expect(snap.overall).toBe('unknown') - expect(snap.deployments[DEPLOYMENT].status).toBe('unknown') - expect(snap.scrapedAt).toBeNull() - }) - - test('downgrades stale snapshots to unhealthy', () => { - const metrics = parsePrometheusText( - fixture({ requestRate: 10, errorRate: 0, kvBlocks: 0.1 }), - 1000, - ) - const snap = computeSnapshot({ - metrics, - deployments: [DEPLOYMENT], - now: 1000 + DEFAULT_HEALTH_THRESHOLDS.staleSnapshotMs + 1, - }) - expect(snap.overall).toBe('unhealthy') - expect(snap.deployments[DEPLOYMENT].reasons[0]).toBe('snapshot stale') - }) - - test('overall status is the worst across deployments', () => { - const dep2 = 'accounts/test-acc/deployments/d2' - const text = [ - `deployment_replicas{deployment_id="d1"} 1`, - `request_counter_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 100`, - `requests_error_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1",http_code="500"} 0`, - `generator_kv_blocks_fraction:avg_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 0.1`, - `deployment_replicas{deployment_id="d2"} 1`, - `request_counter_total:sum_by_deployment{deployment="${dep2}",deployment_id="d2"} 100`, - `requests_error_total:sum_by_deployment{deployment="${dep2}",deployment_id="d2",http_code="500"} 30`, - `generator_kv_blocks_fraction:avg_by_deployment{deployment="${dep2}",deployment_id="d2"} 0.1`, - ].join('\n') - const metrics = parsePrometheusText(text, 1000) - const snap = computeSnapshot({ - metrics, - deployments: [DEPLOYMENT, dep2], - now: 1000, - }) - expect(snap.deployments[DEPLOYMENT].status).toBe('healthy') - expect(snap.deployments[dep2].status).toBe('unhealthy') - expect(snap.overall).toBe('unhealthy') - }) -}) diff --git a/web/src/server/fireworks-monitor/__tests__/monitor.test.ts b/web/src/server/fireworks-monitor/__tests__/monitor.test.ts deleted file mode 100644 index c437842384..0000000000 --- a/web/src/server/fireworks-monitor/__tests__/monitor.test.ts +++ /dev/null @@ -1,189 +0,0 @@ -import { afterEach, describe, expect, test } from 'bun:test' - -import { - __resetFireworksMonitorForTests, - getFireworksHealthSnapshot, - isFireworksAdmissible, - refreshFireworksHealthNow, - scrapeFireworksMetrics, - startFireworksMonitor, - stopFireworksMonitor, -} from '../monitor' - -afterEach(() => { - __resetFireworksMonitorForTests() -}) - -const DEPLOYMENT = 'accounts/test-acc/deployments/d1' - -const HEALTHY_BODY = [ - `deployment_replicas{deployment_id="d1"} 1`, - `request_counter_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 10`, - `requests_error_total:sum_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1",http_code="500"} 0`, - `generator_kv_blocks_fraction:avg_by_deployment{deployment="${DEPLOYMENT}",deployment_id="d1"} 0.1`, -].join('\n') - -function makeFetchMock( - responses: Array<{ status: number; body?: string; headers?: Record }>, -) { - const calls: Array<{ url: string; init?: RequestInit }> = [] - let i = 0 - const impl = (async (url: string, init?: RequestInit): Promise => { - calls.push({ url: String(url), init }) - const { status, body = '', headers = {} } = responses[Math.min(i, responses.length - 1)] - i++ - return new Response(body, { status, headers }) - }) as unknown as typeof globalThis.fetch - return { fetch: impl, calls: () => calls } -} - -describe('scrapeFireworksMetrics', () => { - test('sends Bearer auth + parses Prometheus response', async () => { - const { fetch, calls } = makeFetchMock([ - { status: 200, body: HEALTHY_BODY }, - ]) - - const metrics = await scrapeFireworksMetrics({ - apiKey: 'test-key', - accountId: 'acc-1', - fetch, - }) - - expect(metrics.samples.length).toBeGreaterThan(0) - const recorded = calls() - expect(recorded).toHaveLength(1) - expect(recorded[0].url).toBe('https://api.fireworks.ai/v1/accounts/acc-1/metrics') - const authHeader = (recorded[0].init?.headers as Record)?.Authorization - expect(authHeader).toBe('Bearer test-key') - }) - - test('throws FireworksScrapeError on 429 with retry-after seconds', async () => { - const { fetch } = makeFetchMock([ - { status: 429, body: 'slow down', headers: { 'retry-after': '45' } }, - ]) - - let caught: unknown = null - try { - await scrapeFireworksMetrics({ apiKey: 'k', accountId: 'acc', fetch }) - } catch (err) { - caught = err - } - expect(caught).toBeInstanceOf(Error) - const scrapeError = caught as Error & { status?: number; retryAfterMs?: number | null } - expect(scrapeError.status).toBe(429) - expect(scrapeError.retryAfterMs).toBe(45_000) - }) -}) - -describe('startFireworksMonitor', () => { - test('does not start when FIREWORKS_API_KEY is missing', () => { - const started = startFireworksMonitor({ apiKey: '' }) - expect(started).toBe(false) - }) - - test('first scrape populates the snapshot immediately', async () => { - const { fetch } = makeFetchMock([{ status: 200, body: HEALTHY_BODY }]) - - startFireworksMonitor({ - apiKey: 'test-key', - accountId: 'acc-1', - deployments: [DEPLOYMENT], - pollIntervalMs: 10 * 60_000, - fetch, - }) - - await refreshFireworksHealthNow() - - const snap = getFireworksHealthSnapshot() - expect(snap.overall).toBe('healthy') - expect(snap.scrapedAt).not.toBeNull() - expect(snap.deployments[DEPLOYMENT].status).toBe('healthy') - }) - - test('429 sets lastError and keeps snapshot unknown until a good scrape', async () => { - const { fetch } = makeFetchMock([ - { status: 429, body: 'rate limited', headers: { 'retry-after': '30' } }, - ]) - - startFireworksMonitor({ - apiKey: 'test-key', - accountId: 'acc-1', - deployments: [DEPLOYMENT], - pollIntervalMs: 10 * 60_000, - fetch, - }) - - await refreshFireworksHealthNow() - - const snap = getFireworksHealthSnapshot() - expect(snap.overall).toBe('unknown') - expect(snap.lastError).toMatch(/429/) - }) - - test('returns true and is idempotent on duplicate start', () => { - const { fetch } = makeFetchMock([{ status: 200, body: HEALTHY_BODY }]) - expect(startFireworksMonitor({ apiKey: 'k', fetch })).toBe(true) - expect(startFireworksMonitor({ apiKey: 'k', fetch })).toBe(true) - }) -}) - -describe('isFireworksAdmissible', () => { - test('returns false when monitor not started', () => { - expect(isFireworksAdmissible()).toBe(false) - }) - - test('returns true only when overall is healthy', async () => { - const { fetch } = makeFetchMock([{ status: 200, body: HEALTHY_BODY }]) - startFireworksMonitor({ - apiKey: 'k', - accountId: 'acc', - deployments: [DEPLOYMENT], - pollIntervalMs: 10 * 60_000, - fetch, - }) - await refreshFireworksHealthNow() - expect(isFireworksAdmissible()).toBe(true) - }) - - test('fails closed on unhealthy (stale) snapshot', async () => { - const { fetch } = makeFetchMock([ - { status: 200, body: HEALTHY_BODY }, - { status: 500, body: 'down' }, - ]) - startFireworksMonitor({ - apiKey: 'k', - accountId: 'acc', - deployments: [DEPLOYMENT], - pollIntervalMs: 10 * 60_000, - thresholds: { ...(await import('../compute-health')).DEFAULT_HEALTH_THRESHOLDS, staleSnapshotMs: 0 }, - fetch, - }) - await refreshFireworksHealthNow() // good scrape - - // Force stale by waiting one event-loop tick; staleSnapshotMs=0 makes it stale immediately. - await new Promise((r) => setTimeout(r, 1)) - expect(isFireworksAdmissible()).toBe(false) - }) - - test('can gate on a specific deployment id', async () => { - const { fetch } = makeFetchMock([{ status: 200, body: HEALTHY_BODY }]) - startFireworksMonitor({ - apiKey: 'k', - accountId: 'acc', - deployments: [DEPLOYMENT], - pollIntervalMs: 10 * 60_000, - fetch, - }) - await refreshFireworksHealthNow() - - expect(isFireworksAdmissible('d1')).toBe(true) - expect(isFireworksAdmissible('unknown-id')).toBe(false) - }) -}) - -describe('stopFireworksMonitor', () => { - test('is idempotent and safe to call when not started', () => { - stopFireworksMonitor() - stopFireworksMonitor() - }) -}) diff --git a/web/src/server/fireworks-monitor/__tests__/parse-prometheus.test.ts b/web/src/server/fireworks-monitor/__tests__/parse-prometheus.test.ts deleted file mode 100644 index 062b96427d..0000000000 --- a/web/src/server/fireworks-monitor/__tests__/parse-prometheus.test.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { describe, expect, test } from 'bun:test' - -import { - estimateHistogramPercentile, - findSamples, - parsePrometheusText, -} from '../parse-prometheus' - -describe('parsePrometheusText', () => { - test('parses a sample with labels and a value', () => { - const text = [ - '# HELP request_counter_total:sum_by_deployment Request rate', - '# TYPE request_counter_total:sum_by_deployment gauge', - 'request_counter_total:sum_by_deployment{base_model="m",deployment="accounts/a/deployments/d1",deployment_account="a",deployment_id="d1"} 4.5', - ].join('\n') - - const parsed = parsePrometheusText(text, 1000) - - expect(parsed.scrapedAt).toBe(1000) - expect(parsed.samples).toHaveLength(1) - expect(parsed.samples[0]).toEqual({ - name: 'request_counter_total:sum_by_deployment', - labels: { - base_model: 'm', - deployment: 'accounts/a/deployments/d1', - deployment_account: 'a', - deployment_id: 'd1', - }, - value: 4.5, - }) - }) - - test('skips comments and blank lines', () => { - const text = [ - '# comment', - '', - 'foo 1', - '# another', - 'bar 2', - ].join('\n') - const parsed = parsePrometheusText(text) - expect(parsed.samples.map((s) => s.name)).toEqual(['foo', 'bar']) - }) - - test('parses special numeric values', () => { - const text = [ - 'm_nan NaN', - 'm_pinf +Inf', - 'm_ninf -Inf', - ].join('\n') - const parsed = parsePrometheusText(text) - expect(Number.isNaN(parsed.samples[0].value)).toBe(true) - expect(parsed.samples[1].value).toBe(Number.POSITIVE_INFINITY) - expect(parsed.samples[2].value).toBe(Number.NEGATIVE_INFINITY) - }) - - test('handles escaped quotes in labels', () => { - const text = 'm{path="a\\"b",name="x"} 1' - const parsed = parsePrometheusText(text) - expect(parsed.samples[0].labels).toEqual({ path: 'a"b', name: 'x' }) - }) - - test('ignores trailing timestamp on value', () => { - const text = 'm{a="1"} 42 1700000000000' - const parsed = parsePrometheusText(text) - expect(parsed.samples[0].value).toBe(42) - }) -}) - -describe('findSamples', () => { - test('filters by metric name and labels', () => { - const parsed = parsePrometheusText( - [ - 'm{deployment="d1"} 1', - 'm{deployment="d2"} 2', - 'other{deployment="d1"} 99', - ].join('\n'), - ) - const found = findSamples(parsed, 'm', { deployment: 'd1' }) - expect(found).toHaveLength(1) - expect(found[0].value).toBe(1) - }) -}) - -describe('estimateHistogramPercentile', () => { - test('returns le of first bucket that meets the percentile', () => { - const parsed = parsePrometheusText( - [ - 'h_bucket{le="10"} 10', - 'h_bucket{le="100"} 50', - 'h_bucket{le="1000"} 90', - 'h_bucket{le="+Inf"} 100', - ].join('\n'), - ) - const buckets = findSamples(parsed, 'h_bucket') - expect(estimateHistogramPercentile(buckets, 0.5)).toBe(100) - expect(estimateHistogramPercentile(buckets, 0.9)).toBe(1000) - expect(estimateHistogramPercentile(buckets, 0.1)).toBe(10) - }) - - test('returns null if total is zero', () => { - const parsed = parsePrometheusText( - [ - 'h_bucket{le="10"} 0', - 'h_bucket{le="+Inf"} 0', - ].join('\n'), - ) - expect( - estimateHistogramPercentile(findSamples(parsed, 'h_bucket'), 0.5), - ).toBeNull() - }) - - test('returns null when there are no buckets', () => { - expect(estimateHistogramPercentile([], 0.5)).toBeNull() - }) -}) diff --git a/web/src/server/fireworks-monitor/compute-health.ts b/web/src/server/fireworks-monitor/compute-health.ts deleted file mode 100644 index aa9ae53ba2..0000000000 --- a/web/src/server/fireworks-monitor/compute-health.ts +++ /dev/null @@ -1,294 +0,0 @@ -import { - avgSamples, - estimateHistogramPercentile, - findSamples, - sumSamples, -} from './parse-prometheus' - -import type { - DeploymentHealth, - DeploymentHealthStatus, - FireworksHealthSnapshot, - PromMetrics, - PromSample, -} from './types' - -export interface HealthThresholds { - /** If no successful scrape for this long, overall status is unhealthy. */ - staleSnapshotMs: number - /** Minimum request rate (req/s) before applying the error-fraction check. Below - * this, a handful of transient errors on a near-idle deployment would flap the - * status unnecessarily. */ - minRequestRateForErrorCheck: number - /** Fraction of requests erroring: above this → degraded. */ - errorFractionDegraded: number - /** Fraction of requests erroring: above this → unhealthy. */ - errorFractionUnhealthy: number - /** KV blocks fraction above this → degraded (queue contention imminent). */ - kvBlocksFractionDegraded: number - /** KV blocks fraction above this → unhealthy (cache thrashing). */ - kvBlocksFractionUnhealthy: number - /** p50 time spent in generation queue above this (ms) → degraded. */ - generationQueueMsDegraded: number - /** p50 time spent in generation queue above this (ms) → unhealthy. */ - generationQueueMsUnhealthy: number - /** p50 TTFT above this (ms) → degraded. */ - ttftMsDegraded: number - /** p50 TTFT above this (ms) → unhealthy. */ - ttftMsUnhealthy: number -} - -// Tuned to trip 'degraded' before users feel it on glm-5.1. Override per-instance -// via startFireworksMonitor({ thresholds }). -export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = { - staleSnapshotMs: 3 * 60 * 1000, - minRequestRateForErrorCheck: 0.1, - errorFractionDegraded: 0.02, - errorFractionUnhealthy: 0.1, - kvBlocksFractionDegraded: 0.85, - kvBlocksFractionUnhealthy: 0.97, - generationQueueMsDegraded: 300, - generationQueueMsUnhealthy: 2_000, - ttftMsDegraded: 1_500, - ttftMsUnhealthy: 10_000, -} - -const STATUS_RANK: Record = { - healthy: 0, - degraded: 1, - unhealthy: 2, - unknown: 3, -} - -export function computeDeploymentHealth(params: { - deployment: string - metrics: PromMetrics - thresholds: HealthThresholds -}): DeploymentHealth { - const { deployment, metrics, thresholds } = params - const filter = { deployment } - const deploymentId = parseDeploymentId(deployment) - - // `deployment_replicas` is keyed by deployment_id (not the full deployment - // path). Zero or missing replicas means the deployment is cold / scaled to - // zero / deleted — admission must fail closed in that case. - const replicasSamples = findSamples(metrics, 'deployment_replicas', { - deployment_id: deploymentId, - }) - const replicas = replicasSamples.length > 0 ? sumSamples(replicasSamples) : null - - const requestRateSamples = findSamples( - metrics, - 'request_counter_total:sum_by_deployment', - filter, - ) - const errorRateSamples = findSamples( - metrics, - 'requests_error_total:sum_by_deployment', - filter, - ) - - const requestRate = sumSamples(requestRateSamples) - const errorRate = sumSamples(errorRateSamples) - const errorFraction = requestRate > 0 ? errorRate / requestRate : 0 - - const concurrentRequests = - avgSamples( - findSamples( - metrics, - 'requests_coordinator_concurrent_count:avg_by_deployment', - filter, - ), - ) ?? 0 - - const kvBlocksFraction = - avgSamples( - findSamples(metrics, 'generator_kv_blocks_fraction:avg_by_deployment', filter), - ) ?? 0 - const kvSlotsFraction = - avgSamples( - findSamples(metrics, 'generator_kv_slots_fraction:avg_by_deployment', filter), - ) ?? 0 - - const p50GenerationQueueMs = percentileForDeployment( - metrics, - 'latency_generation_queue_ms_bucket:sum_by_deployment', - deployment, - 0.5, - ) - const p50TimeToFirstTokenMs = percentileForDeployment( - metrics, - 'latency_to_first_token_ms_bucket:sum_by_deployment', - deployment, - 0.5, - ) - - const baseModelSample = [ - ...requestRateSamples, - ...errorRateSamples, - ].find((s) => s.labels.base_model) - const baseModel = baseModelSample?.labels.base_model ?? null - - const reasons: string[] = [] - let status: DeploymentHealthStatus = 'healthy' - - const upgrade = (next: DeploymentHealthStatus) => { - if (STATUS_RANK[next] > STATUS_RANK[status]) status = next - } - - // A deployment with no running replicas cannot serve traffic. Treat as - // unhealthy unconditionally so admission stops funneling users to a cold - // backend. Missing gauge (`replicas === null`) is the strongest signal - // Fireworks has dropped the deployment from its scrape entirely. - if (replicas === null) { - reasons.push('no replicas metric — deployment cold or deleted') - upgrade('unhealthy') - } else if (replicas <= 0) { - reasons.push(`replicas=${replicas}`) - upgrade('unhealthy') - } - - if (requestRate >= thresholds.minRequestRateForErrorCheck) { - if (errorFraction >= thresholds.errorFractionUnhealthy) { - reasons.push(`error rate ${(errorFraction * 100).toFixed(1)}% ≥ ${(thresholds.errorFractionUnhealthy * 100).toFixed(1)}%`) - upgrade('unhealthy') - } else if (errorFraction >= thresholds.errorFractionDegraded) { - reasons.push(`error rate ${(errorFraction * 100).toFixed(1)}% ≥ ${(thresholds.errorFractionDegraded * 100).toFixed(1)}%`) - upgrade('degraded') - } - } - - if (kvBlocksFraction >= thresholds.kvBlocksFractionUnhealthy) { - reasons.push(`KV blocks ${(kvBlocksFraction * 100).toFixed(0)}% ≥ ${(thresholds.kvBlocksFractionUnhealthy * 100).toFixed(0)}%`) - upgrade('unhealthy') - } else if (kvBlocksFraction >= thresholds.kvBlocksFractionDegraded) { - reasons.push(`KV blocks ${(kvBlocksFraction * 100).toFixed(0)}% ≥ ${(thresholds.kvBlocksFractionDegraded * 100).toFixed(0)}%`) - upgrade('degraded') - } - - if (p50GenerationQueueMs !== null) { - if (p50GenerationQueueMs >= thresholds.generationQueueMsUnhealthy) { - reasons.push(`p50 queue ${Math.round(p50GenerationQueueMs)}ms ≥ ${thresholds.generationQueueMsUnhealthy}ms`) - upgrade('unhealthy') - } else if (p50GenerationQueueMs >= thresholds.generationQueueMsDegraded) { - reasons.push(`p50 queue ${Math.round(p50GenerationQueueMs)}ms ≥ ${thresholds.generationQueueMsDegraded}ms`) - upgrade('degraded') - } - } - - if (p50TimeToFirstTokenMs !== null) { - if (p50TimeToFirstTokenMs >= thresholds.ttftMsUnhealthy) { - reasons.push(`p50 TTFT ${Math.round(p50TimeToFirstTokenMs)}ms ≥ ${thresholds.ttftMsUnhealthy}ms`) - upgrade('unhealthy') - } else if (p50TimeToFirstTokenMs >= thresholds.ttftMsDegraded) { - reasons.push(`p50 TTFT ${Math.round(p50TimeToFirstTokenMs)}ms ≥ ${thresholds.ttftMsDegraded}ms`) - upgrade('degraded') - } - } - - return { - deploymentId, - deployment, - baseModel, - status, - reasons, - metrics: { - replicas, - requestRate, - errorRate, - errorFraction, - concurrentRequests, - kvBlocksFraction, - kvSlotsFraction, - p50GenerationQueueMs, - p50TimeToFirstTokenMs, - }, - } -} - -function percentileForDeployment( - metrics: PromMetrics, - metricName: string, - deployment: string, - percentile: number, -): number | null { - const buckets: PromSample[] = findSamples(metrics, metricName, { deployment }) - return estimateHistogramPercentile(buckets, percentile) -} - -function parseDeploymentId(deployment: string): string { - const parts = deployment.split('/') - return parts[parts.length - 1] ?? deployment -} - -export function computeSnapshot(params: { - metrics: PromMetrics | null - deployments: string[] - thresholds?: HealthThresholds - now?: number - lastError?: string | null -}): FireworksHealthSnapshot { - const thresholds = params.thresholds ?? DEFAULT_HEALTH_THRESHOLDS - const now = params.now ?? Date.now() - const lastError = params.lastError ?? null - - if (!params.metrics) { - const unknownDeployments: Record = {} - for (const deployment of params.deployments) { - unknownDeployments[deployment] = { - deploymentId: parseDeploymentId(deployment), - deployment, - baseModel: null, - status: 'unknown', - reasons: ['no scrape yet'], - metrics: { - replicas: null, - requestRate: 0, - errorRate: 0, - errorFraction: 0, - concurrentRequests: 0, - kvBlocksFraction: 0, - kvSlotsFraction: 0, - p50GenerationQueueMs: null, - p50TimeToFirstTokenMs: null, - }, - } - } - return { - scrapedAt: null, - ageMs: null, - overall: 'unknown', - deployments: unknownDeployments, - lastError, - } - } - - const deployments: Record = {} - let worst: DeploymentHealthStatus = 'healthy' - - const stale = now - params.metrics.scrapedAt > thresholds.staleSnapshotMs - - for (const deployment of params.deployments) { - const health = computeDeploymentHealth({ - deployment, - metrics: params.metrics, - thresholds, - }) - if (stale) { - health.reasons.unshift('snapshot stale') - if (STATUS_RANK['unhealthy'] > STATUS_RANK[health.status]) { - health.status = 'unhealthy' - } - } - deployments[deployment] = health - if (STATUS_RANK[health.status] > STATUS_RANK[worst]) worst = health.status - } - - return { - scrapedAt: params.metrics.scrapedAt, - ageMs: now - params.metrics.scrapedAt, - overall: worst, - deployments, - lastError, - } -} diff --git a/web/src/server/fireworks-monitor/monitor.ts b/web/src/server/fireworks-monitor/monitor.ts deleted file mode 100644 index 501e90d3bd..0000000000 --- a/web/src/server/fireworks-monitor/monitor.ts +++ /dev/null @@ -1,316 +0,0 @@ -import { env } from '@codebuff/internal/env' - -import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from './compute-health' -import { parsePrometheusText } from './parse-prometheus' - -import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config' -import { logger } from '@/util/logger' - -import type { HealthThresholds } from './compute-health' -import type { FireworksHealthSnapshot, PromMetrics } from './types' - -const FIREWORKS_METRICS_URL = (accountId: string) => - `https://api.fireworks.ai/v1/accounts/${accountId}/metrics` - -const DEFAULT_POLL_INTERVAL_MS = 60_000 -/** Random ± jitter so multiple pods don't line up and collectively exceed - * the Fireworks 6 req/min/account rate limit. */ -const POLL_JITTER_MS = 10_000 -const FETCH_TIMEOUT_MS = 15_000 -/** Cap Retry-After honored on 429 so a bad header cannot stall the monitor - * indefinitely. */ -const MAX_BACKOFF_MS = 5 * 60 * 1000 -/** Fallback backoff if Fireworks returns 429 without a parseable Retry-After. */ -const DEFAULT_429_BACKOFF_MS = 60_000 - -export interface MonitorOptions { - apiKey: string - accountId: string - deployments: string[] - pollIntervalMs?: number - thresholds?: HealthThresholds - fetch?: typeof globalThis.fetch -} - -interface MonitorState { - options: MonitorOptions - metrics: PromMetrics | null - lastError: string | null - /** Earliest time at which the next scrape may fire (honors Retry-After). */ - backoffUntil: number - timer: ReturnType | null - inFlight: Promise | null - /** True once stopFireworksMonitor has been called — suppresses in-flight reschedules. */ - stopped: boolean -} - -let state: MonitorState | null = null - -class FireworksScrapeError extends Error { - constructor( - public readonly status: number, - public readonly statusText: string, - public readonly retryAfterMs: number | null, - bodyPreview: string, - ) { - super(`Fireworks metrics scrape failed: ${status} ${statusText}${bodyPreview ? ` — ${bodyPreview}` : ''}`) - this.name = 'FireworksScrapeError' - } -} - -export async function scrapeFireworksMetrics(params: { - apiKey: string - accountId: string - fetch?: typeof globalThis.fetch - signal?: AbortSignal - now?: number -}): Promise { - const fetchImpl = params.fetch ?? globalThis.fetch - const response = await fetchImpl(FIREWORKS_METRICS_URL(params.accountId), { - method: 'GET', - headers: { - Authorization: `Bearer ${params.apiKey}`, - }, - signal: params.signal, - }) - - if (!response.ok) { - const body = await response.text().catch(() => '') - const retryAfterMs = parseRetryAfter(response.headers.get('retry-after')) - throw new FireworksScrapeError( - response.status, - response.statusText, - retryAfterMs, - body.slice(0, 200), - ) - } - - const text = await response.text() - return parsePrometheusText(text, params.now ?? Date.now()) -} - -function parseRetryAfter(raw: string | null): number | null { - if (!raw) return null - const seconds = Number(raw) - if (Number.isFinite(seconds) && seconds >= 0) { - return Math.min(seconds * 1000, MAX_BACKOFF_MS) - } - const dateMs = Date.parse(raw) - if (!Number.isNaN(dateMs)) { - const delta = dateMs - Date.now() - return Math.min(Math.max(delta, 0), MAX_BACKOFF_MS) - } - return null -} - -function jittered(intervalMs: number): number { - const delta = (Math.random() * 2 - 1) * POLL_JITTER_MS - return Math.max(1_000, Math.round(intervalMs + delta)) -} - -/** Unwrap nested `.cause` chains (undici's `fetch failed` wraps the real - * error — DNS, ECONNREFUSED, TLS, etc. — under `.cause`). */ -function describeError(error: unknown): { - message: string - name?: string - code?: string - causes: Array<{ name?: string; message: string; code?: string }> - stack?: string -} { - const causes: Array<{ name?: string; message: string; code?: string }> = [] - let cursor: unknown = error instanceof Error ? (error as any).cause : undefined - let guard = 0 - while (cursor && guard < 5) { - if (cursor instanceof Error) { - causes.push({ - name: cursor.name, - message: cursor.message, - code: (cursor as any).code, - }) - cursor = (cursor as any).cause - } else { - causes.push({ message: String(cursor) }) - break - } - guard++ - } - return { - message: error instanceof Error ? error.message : String(error), - name: error instanceof Error ? error.name : undefined, - code: error instanceof Error ? (error as any).code : undefined, - causes, - stack: error instanceof Error ? error.stack : undefined, - } -} - -async function pollOnce(): Promise { - if (!state) return - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS) - const url = FIREWORKS_METRICS_URL(state.options.accountId) - try { - const metrics = await scrapeFireworksMetrics({ - apiKey: state.options.apiKey, - accountId: state.options.accountId, - fetch: state.options.fetch, - signal: controller.signal, - }) - state.metrics = metrics - state.lastError = null - state.backoffUntil = 0 - } catch (error) { - const details = describeError(error) - state.lastError = details.message - if (error instanceof FireworksScrapeError && error.status === 429) { - const backoffMs = error.retryAfterMs ?? DEFAULT_429_BACKOFF_MS - state.backoffUntil = Date.now() + backoffMs - logger.warn( - { status: 429, backoffMs }, - '[FireworksMonitor] Rate limited, backing off', - ) - } else { - logger.warn( - { - error: details.message, - errorName: details.name, - errorCode: details.code, - causes: details.causes, - aborted: controller.signal.aborted, - url, - accountId: state.options.accountId, - usingCustomFetch: Boolean(state.options.fetch), - stack: details.stack, - }, - '[FireworksMonitor] Scrape failed', - ) - } - } finally { - clearTimeout(timeout) - } -} - -function scheduleNext() { - if (!state || state.stopped) return - const intervalMs = state.options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS - const base = jittered(intervalMs) - const untilBackoff = Math.max(0, state.backoffUntil - Date.now()) - const delayMs = Math.max(base, untilBackoff) - const timer = setTimeout(runTick, delayMs) - if (typeof timer.unref === 'function') timer.unref() - state.timer = timer -} - -function runTick() { - if (!state || state.stopped || state.inFlight) { - scheduleNext() - return - } - state.inFlight = pollOnce().finally(() => { - if (!state) return - state.inFlight = null - scheduleNext() - }) -} - -export function startFireworksMonitor(options: Partial = {}): boolean { - if (state) return true - - const apiKey = options.apiKey ?? env.FIREWORKS_API_KEY - if (!apiKey) { - logger.warn({}, '[FireworksMonitor] FIREWORKS_API_KEY not set — monitor not started') - return false - } - - const accountId = options.accountId ?? FIREWORKS_ACCOUNT_ID - const deployments = - options.deployments ?? Object.values(FIREWORKS_DEPLOYMENT_MAP) - const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS - const thresholds = options.thresholds ?? DEFAULT_HEALTH_THRESHOLDS - - state = { - options: { - apiKey, - accountId, - deployments, - pollIntervalMs, - thresholds, - fetch: options.fetch, - }, - metrics: null, - lastError: null, - backoffUntil: 0, - timer: null, - inFlight: null, - stopped: false, - } - - // First scrape runs immediately; subsequent scrapes are self-scheduled via - // scheduleNext() with jitter so N pods don't synchronise. - runTick() - - logger.info( - { - accountId, - deployments, - pollIntervalMs, - }, - '[FireworksMonitor] Started', - ) - return true -} - -export function stopFireworksMonitor(): void { - if (!state) return - state.stopped = true - if (state.timer) clearTimeout(state.timer) - state = null -} - -export function getFireworksHealthSnapshot(now: number = Date.now()): FireworksHealthSnapshot { - if (!state) { - return { - scrapedAt: null, - ageMs: null, - overall: 'unknown', - deployments: {}, - lastError: 'monitor not started', - } - } - return computeSnapshot({ - metrics: state.metrics, - deployments: state.options.deployments, - thresholds: state.options.thresholds, - now, - lastError: state.lastError, - }) -} - -/** - * Gate free-session admission: ONLY returns true when the latest snapshot is - * 'healthy'. Any other status — 'degraded', 'unhealthy', 'unknown' — fails - * closed so the waiting room catches requests during incidents, cold starts, - * or monitor failures. - * - * Pass `deploymentId` to gate on a specific deployment instead of the overall - * worst-case. - */ -export function isFireworksAdmissible(deploymentId?: string): boolean { - const snapshot = getFireworksHealthSnapshot() - if (deploymentId) { - const match = Object.values(snapshot.deployments).find( - (d) => d.deploymentId === deploymentId || d.deployment === deploymentId, - ) - return match?.status === 'healthy' - } - return snapshot.overall === 'healthy' -} - -/** Force an immediate scrape (for tests / admin endpoints). Resolves when done. */ -export async function refreshFireworksHealthNow(): Promise { - if (!state) return - await pollOnce() -} - -export function __resetFireworksMonitorForTests(): void { - stopFireworksMonitor() -} diff --git a/web/src/server/fireworks-monitor/parse-prometheus.ts b/web/src/server/fireworks-monitor/parse-prometheus.ts deleted file mode 100644 index 1518fa4e41..0000000000 --- a/web/src/server/fireworks-monitor/parse-prometheus.ts +++ /dev/null @@ -1,147 +0,0 @@ -import type { PromMetrics, PromSample } from './types' - -const LINE_RE = /^([a-zA-Z_:][a-zA-Z0-9_:]*)(\{([^}]*)\})?\s+(.+)$/ - -export function parsePrometheusText(text: string, now: number = Date.now()): PromMetrics { - const samples: PromSample[] = [] - - for (const rawLine of text.split('\n')) { - const line = rawLine.trim() - if (line === '' || line.startsWith('#')) continue - - const match = LINE_RE.exec(line) - if (!match) continue - - const name = match[1] - const labelBlob = match[3] ?? '' - const valueStr = match[4].trim() - - const value = parsePromValue(valueStr) - if (value === null) continue - - samples.push({ - name, - labels: parseLabels(labelBlob), - value, - }) - } - - return { samples, scrapedAt: now } -} - -function parsePromValue(raw: string): number | null { - const trimmed = raw.split(/\s+/)[0] - if (trimmed === 'NaN') return NaN - if (trimmed === '+Inf') return Number.POSITIVE_INFINITY - if (trimmed === '-Inf') return Number.NEGATIVE_INFINITY - const n = Number(trimmed) - return Number.isFinite(n) || Number.isNaN(n) ? n : null -} - -function parseLabels(blob: string): Record { - const labels: Record = {} - if (blob === '') return labels - - let i = 0 - while (i < blob.length) { - while (i < blob.length && (blob[i] === ' ' || blob[i] === ',')) i++ - if (i >= blob.length) break - - const eq = blob.indexOf('=', i) - if (eq === -1) break - const key = blob.slice(i, eq).trim() - - let j = eq + 1 - if (blob[j] !== '"') break - j++ - let value = '' - while (j < blob.length && blob[j] !== '"') { - if (blob[j] === '\\' && j + 1 < blob.length) { - const next = blob[j + 1] - value += next === 'n' ? '\n' : next === 't' ? '\t' : next - j += 2 - } else { - value += blob[j] - j++ - } - } - labels[key] = value - i = j + 1 - } - - return labels -} - -export function findSamples( - metrics: PromMetrics, - name: string, - labelFilter: Record = {}, -): PromSample[] { - return metrics.samples.filter((s) => { - if (s.name !== name) return false - for (const [k, v] of Object.entries(labelFilter)) { - if (s.labels[k] !== v) return false - } - return true - }) -} - -export function sumSamples(samples: PromSample[]): number { - let sum = 0 - for (const s of samples) { - if (Number.isFinite(s.value)) sum += s.value - } - return sum -} - -export function avgSamples(samples: PromSample[]): number | null { - if (samples.length === 0) return null - const finite = samples.filter((s) => Number.isFinite(s.value)) - if (finite.length === 0) return null - return sumSamples(finite) / finite.length -} - -export function estimateHistogramPercentile( - buckets: PromSample[], - percentile: number, -): number | null { - if (buckets.length === 0) return null - - const sorted = [...buckets] - .map((b) => { - const leRaw = b.labels.le - const le = leRaw === '+Inf' ? Number.POSITIVE_INFINITY : Number(leRaw) - return { le, count: b.value } - }) - .filter((b) => !Number.isNaN(b.le)) - .sort((a, b) => a.le - b.le) - - if (sorted.length === 0) return null - const total = sorted[sorted.length - 1].count - if (!Number.isFinite(total) || total <= 0) return null - - const target = total * percentile - for (let idx = 0; idx < sorted.length; idx++) { - if (sorted[idx].count >= target) { - if (sorted[idx].le === Number.POSITIVE_INFINITY) { - return idx > 0 ? sorted[idx - 1].le : null - } - return sorted[idx].le - } - } - return null -} - -export function groupBucketsByLabels( - samples: PromSample[], - groupKeys: string[], -): Map { - const groups = new Map() - for (const s of samples) { - const key = groupKeys.map((k) => `${k}=${s.labels[k] ?? ''}`).join('|') - const arr = groups.get(key) ?? [] - arr.push(s) - groups.set(key, arr) - } - return groups -} diff --git a/web/src/server/fireworks-monitor/types.ts b/web/src/server/fireworks-monitor/types.ts deleted file mode 100644 index cc10a610ea..0000000000 --- a/web/src/server/fireworks-monitor/types.ts +++ /dev/null @@ -1,41 +0,0 @@ -export interface PromSample { - name: string - labels: Record - value: number -} - -export interface PromMetrics { - samples: PromSample[] - scrapedAt: number -} - -export type DeploymentHealthStatus = 'healthy' | 'degraded' | 'unhealthy' | 'unknown' - -export interface DeploymentHealth { - deploymentId: string - deployment: string - baseModel: string | null - status: DeploymentHealthStatus - reasons: string[] - metrics: { - /** null when Fireworks doesn't emit a deployment_replicas gauge for the - * deployment (cold / deleted / not-yet-scraped). 0 means scaled-to-zero. */ - replicas: number | null - requestRate: number - errorRate: number - errorFraction: number - concurrentRequests: number - kvBlocksFraction: number - kvSlotsFraction: number - p50GenerationQueueMs: number | null - p50TimeToFirstTokenMs: number | null - } -} - -export interface FireworksHealthSnapshot { - scrapedAt: number | null - ageMs: number | null - overall: DeploymentHealthStatus - deployments: Record - lastError: string | null -} diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index 2e72d2351e..60a5b92907 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -19,7 +19,7 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep calls.admit.push(limit) return Array.from({ length: limit }, (_, i) => ({ user_id: `u${i}` })) }, - isFireworksAdmissible: () => true, + isFireworksAdmissible: async () => true, getMaxAdmitsPerTick: () => 1, getSessionLengthMs: () => 60 * 60 * 1000, getSessionGraceMs: () => 30 * 60 * 1000, @@ -44,7 +44,7 @@ describe('runAdmissionTick', () => { test('skips admission when Fireworks not healthy', async () => { const deps = makeAdmissionDeps({ - isFireworksAdmissible: () => false, + isFireworksAdmissible: async () => false, }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) @@ -58,7 +58,7 @@ describe('runAdmissionTick', () => { swept = 3 return 3 }, - isFireworksAdmissible: () => false, + isFireworksAdmissible: async () => false, }) const result = await runAdmissionTick(deps) expect(swept).toBe(3) diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 6868903c38..3c73518452 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,3 +1,5 @@ +import { env } from '@codebuff/internal/env' + import { ADMISSION_TICK_MS, MAX_ADMITS_PER_TICK, @@ -7,7 +9,7 @@ import { } from './config' import { admitFromQueue, countActive, queueDepth, sweepExpired } from './store' -import { isFireworksAdmissible } from '@/server/fireworks-monitor/monitor' +import { FIREWORKS_ACCOUNT_ID } from '@/llm-api/fireworks-config' import { logger } from '@/util/logger' interface AdmissionState { @@ -23,6 +25,30 @@ let state: AdmissionState | null = null * queue depth and active count. At ADMISSION_TICK_MS=15s, 10 ticks = 2.5 min. */ const SNAPSHOT_EVERY_N_TICKS = 10 +const FIREWORKS_METRICS_URL = `https://api.fireworks.ai/v1/accounts/${FIREWORKS_ACCOUNT_ID}/metrics` +const HEALTH_CHECK_TIMEOUT_MS = 5_000 + +/** Fails closed on DNS failure, non-OK status, or timeout — so admission halts + * whenever the upstream is unreachable and resumes on its own when it recovers. */ +export async function isFireworksAdmissible(): Promise { + const apiKey = env.FIREWORKS_API_KEY + if (!apiKey) return false + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS) + try { + const response = await fetch(FIREWORKS_METRICS_URL, { + method: 'GET', + headers: { Authorization: `Bearer ${apiKey}` }, + signal: controller.signal, + }) + return response.ok + } catch { + return false + } finally { + clearTimeout(timeout) + } +} + export interface AdmissionDeps { sweepExpired: (now: Date, graceMs: number) => Promise countActive: (now: Date) => Promise @@ -32,7 +58,7 @@ export interface AdmissionDeps { sessionLengthMs: number now: Date }) => Promise<{ user_id: string }[]> - isFireworksAdmissible: () => boolean + isFireworksAdmissible: () => Promise getMaxAdmitsPerTick: () => number getSessionLengthMs: () => number getSessionGraceMs: () => number @@ -44,7 +70,12 @@ const defaultDeps: AdmissionDeps = { countActive, queueDepth, admitFromQueue, - isFireworksAdmissible, + // FREEBUFF_DEV_FORCE_ADMIT lets local `dev:freebuff` drive the full + // waiting-room → admitted → draining → ended flow without a real upstream. + isFireworksAdmissible: + process.env.FREEBUFF_DEV_FORCE_ADMIT === 'true' + ? async () => true + : isFireworksAdmissible, getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK, getSessionLengthMs, getSessionGraceMs, @@ -61,12 +92,12 @@ export interface AdmissionTickResult { /** * Run a single admission tick: * 1. Expire sessions past their expires_at. - * 2. If Fireworks is not 'healthy', skip admission (waiting queue grows). + * 2. If Fireworks is not reachable, skip admission (waiting queue grows). * 3. Admit up to maxAdmitsPerTick queued users. * - * There is no global concurrency cap — the Fireworks health monitor is the + * There is no global concurrency cap — the Fireworks health probe is the * primary gate. Admission drips at (maxAdmitsPerTick / ADMISSION_TICK_MS), - * which drives utilization up slowly; once metrics degrade, step 2 halts + * which drives utilization up slowly; once the probe fails, step 2 halts * admission until things recover. * * Returns counts for observability. Safe to call concurrently across pods — @@ -78,7 +109,7 @@ export async function runAdmissionTick( const now = (deps.now ?? (() => new Date()))() const expired = await deps.sweepExpired(now, deps.getSessionGraceMs()) - if (!deps.isFireworksAdmissible()) { + if (!(await deps.isFireworksAdmissible())) { const [active, depth] = await Promise.all([ deps.countActive(now), deps.queueDepth(), From 5ca04d3b3be5f56e7e5581aaff737c7b15b4ba9d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 18:05:50 -0700 Subject: [PATCH 24/31] Show upgrade-required error to old freebuff clients When the waiting-room gate rejects a free-mode request and no freebuff_instance_id was sent, return 426 with a "please restart to upgrade" message. Old CLI versions render the message verbatim in their error banner; new clients still get the normal gate responses. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/src/app/api/v1/chat/completions/_post.ts | 23 +++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 06258039e7..b2f420882a 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -410,11 +410,32 @@ export async function postChatCompletions(params: { // Runs before the rate limiter so rejected requests don't burn a queued // user's free-mode counters. if (isFreeModeRequest) { + const claimedInstanceId = + typedBody.codebuff_metadata?.freebuff_instance_id const gate = await checkSession({ userId, - claimedInstanceId: typedBody.codebuff_metadata?.freebuff_instance_id, + claimedInstanceId, }) if (!gate.ok) { + // Old freebuff clients (pre-waiting-room) never send an instance_id. + // Return a 426 with a clear "please restart to upgrade" message that + // their existing error banner will render verbatim. + if (!claimedInstanceId) { + trackEvent({ + event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, + userId, + properties: { error: 'freebuff_update_required' }, + logger, + }) + return NextResponse.json( + { + error: 'freebuff_update_required', + message: + 'This version of freebuff is out of date. Please restart freebuff to upgrade and continue using free mode.', + }, + { status: 426 }, + ) + } trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, userId, From f99d28f1308d8b1b493eebc6963e91e235a8f0bd Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 18:14:08 -0700 Subject: [PATCH 25/31] Simplify freebuff waiting-room implementation Collapse client-side draining/ended into a single ended state, move freebuff session state into zustand (replacing the module-level handle singleton), host the Fireworks probe inside admitFromQueue, and share the wire types between server and CLI. Drops ~150 lines net. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/app.tsx | 6 +- cli/src/chat.tsx | 4 +- .../components/freebuff-superseded-screen.tsx | 15 +- cli/src/components/session-ended-banner.tsx | 18 +- cli/src/components/waiting-room-screen.tsx | 90 +++----- cli/src/hooks/helpers/send-message.ts | 9 +- cli/src/hooks/use-freebuff-session.ts | 214 +++++++++--------- cli/src/state/freebuff-session-store.ts | 42 ++++ cli/src/types/freebuff-session.ts | 68 ++---- cli/src/utils/freebuff-exit.ts | 21 ++ common/src/types/freebuff-session.ts | 47 ++++ docs/freebuff-waiting-room.md | 7 +- .../free-session/__tests__/admission.test.ts | 21 +- web/src/server/free-session/admission.ts | 98 +++----- web/src/server/free-session/public-api.ts | 14 +- web/src/server/free-session/store.ts | 50 ++-- web/src/server/free-session/types.ts | 40 +--- 17 files changed, 359 insertions(+), 405 deletions(-) create mode 100644 cli/src/state/freebuff-session-store.ts create mode 100644 cli/src/utils/freebuff-exit.ts create mode 100644 common/src/types/freebuff-session.ts diff --git a/cli/src/app.tsx b/cli/src/app.tsx index ae0cd8ea5a..5c93cd8f6f 100644 --- a/cli/src/app.tsx +++ b/cli/src/app.tsx @@ -376,9 +376,9 @@ const AuthedSurface = ({ // Falling through to on 'none' would leave the user unable to send // any free-mode request until the next poll cycle. // - // 'draining' and 'ended' deliberately fall through to : the agent - // may still be finishing work under the server-side grace period, and the - // chat surface itself swaps the input box for the session-ended banner. + // 'ended' deliberately falls through to : the agent may still be + // finishing work under the server-side grace period, and the chat surface + // itself swaps the input box for the session-ended banner. if ( IS_FREEBUFF && (session === null || diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 845af09f75..1e136654bd 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -1344,9 +1344,7 @@ export const Chat = ({ const hasActiveFreebuffSession = IS_FREEBUFF && freebuffSession?.status === 'active' const isFreebuffSessionOver = - IS_FREEBUFF && - (freebuffSession?.status === 'draining' || - freebuffSession?.status === 'ended') + IS_FREEBUFF && freebuffSession?.status === 'ended' const shouldShowStatusLine = !feedbackMode && (hasStatusIndicatorContent || diff --git a/cli/src/components/freebuff-superseded-screen.tsx b/cli/src/components/freebuff-superseded-screen.tsx index 8d027c8978..a59ae3e144 100644 --- a/cli/src/components/freebuff-superseded-screen.tsx +++ b/cli/src/components/freebuff-superseded-screen.tsx @@ -5,15 +5,11 @@ import React, { useCallback } from 'react' import { useLogo } from '../hooks/use-logo' import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' import { useTheme } from '../hooks/use-theme' -import { flushAnalytics } from '../utils/analytics' -import { withTimeout } from '../utils/terminal-color-detection' +import { exitFreebuffCleanly } from '../utils/freebuff-exit' import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' import type { KeyEvent } from '@opentui/core' -/** Cap on analytics flush so a slow network doesn't block process exit. */ -const EXIT_CLEANUP_TIMEOUT_MS = 1000 - /** * Terminal state shown after a 409 session_superseded response. Another CLI on * the same account rotated our instance id and we've stopped polling — the @@ -31,17 +27,12 @@ export const FreebuffSupersededScreen: React.FC = () => { }) // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes - // through as a normal OpenTUI key event. No DELETE needed here: the other - // CLI already rotated our instance id, so our seat (if any) belongs to them. + // through as a normal OpenTUI key event. useKeyboard( useCallback((key: KeyEvent) => { if (key.ctrl && key.name === 'c') { key.preventDefault?.() - withTimeout(flushAnalytics(), EXIT_CLEANUP_TIMEOUT_MS, undefined).finally( - () => { - process.exit(0) - }, - ) + exitFreebuffCleanly() } }, []), ) diff --git a/cli/src/components/session-ended-banner.tsx b/cli/src/components/session-ended-banner.tsx index d1bd71dbd7..e242e58f76 100644 --- a/cli/src/components/session-ended-banner.tsx +++ b/cli/src/components/session-ended-banner.tsx @@ -3,9 +3,8 @@ import { useKeyboard } from '@opentui/react' import React, { useCallback, useState } from 'react' import { Button } from './button' -import { refreshFreebuffSession } from '../hooks/use-freebuff-session' +import { rejoinFreebuffSession } from '../hooks/use-freebuff-session' import { useTheme } from '../hooks/use-theme' -import { useChatStore } from '../state/chat-store' import { BORDER_CHARS } from '../utils/ui-constants' import type { KeyEvent } from '@opentui/core' @@ -18,10 +17,9 @@ interface SessionEndedBannerProps { } /** - * Replaces the chat input when the freebuff session has ended (client state - * `draining` or `ended`). Captures Enter to re-queue the user; Esc keeps - * falling through to the global stream-interrupt handler so in-flight work - * can be cancelled. + * Replaces the chat input when the freebuff session has ended. Captures + * Enter to re-queue the user; Esc keeps falling through to the global + * stream-interrupt handler so in-flight work can be cancelled. */ export const SessionEndedBanner: React.FC = ({ isStreaming, @@ -40,13 +38,7 @@ export const SessionEndedBanner: React.FC = ({ // Once the POST lands, the hook flips status to 'queued' and app.tsx // swaps us into , unmounting this banner. No need to // clear `rejoining` on success — the component will be gone. - refreshFreebuffSession() - .then(() => { - // Wipe the prior conversation so the next admitted session starts - // with empty history instead of continuing the one that just ended. - useChatStore.getState().reset() - }) - .catch(() => setRejoining(false)) + rejoinFreebuffSession().catch(() => setRejoining(false)) }, [canRejoin]) useKeyboard( diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 73825d0ba0..9eb253e58a 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -6,23 +6,17 @@ import { AdBanner } from './ad-banner' import { Button } from './button' import { ChoiceAdBanner } from './choice-ad-banner' import { ShimmerText } from './shimmer-text' -import { endFreebuffSessionBestEffort } from '../hooks/use-freebuff-session' import { useGravityAd } from '../hooks/use-gravity-ad' import { useLogo } from '../hooks/use-logo' import { useSheenAnimation } from '../hooks/use-sheen-animation' import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' import { useTheme } from '../hooks/use-theme' -import { flushAnalytics } from '../utils/analytics' -import { withTimeout } from '../utils/terminal-color-detection' +import { exitFreebuffCleanly } from '../utils/freebuff-exit' import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' import type { FreebuffSessionResponse } from '../types/freebuff-session' import type { KeyEvent } from '@opentui/core' -/** Cap on exit cleanup (DELETE /session + flushAnalytics) so a slow network - * doesn't block process exit. */ -const EXIT_CLEANUP_TIMEOUT_MS = 1000 - interface WaitingRoomScreenProps { session: FreebuffSessionResponse | null error: string | null @@ -82,30 +76,15 @@ export const WaitingRoomScreen: React.FC = ({ forceStart: true, }) - // Release the seat + flush analytics before exit. Used by both Ctrl+C and - // the top-right X button so they always do the same cleanup. - const handleExit = useCallback(() => { - const cleanup = Promise.allSettled([ - flushAnalytics(), - endFreebuffSessionBestEffort(), - ]) - withTimeout(cleanup, EXIT_CLEANUP_TIMEOUT_MS, undefined).finally(() => { - process.exit(0) - }) - }, []) - // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes - // through as a normal OpenTUI key event. + // through as a normal OpenTUI key event. Shared with the top-right X button. useKeyboard( - useCallback( - (key: KeyEvent) => { - if (key.ctrl && key.name === 'c') { - key.preventDefault?.() - handleExit() - } - }, - [handleExit], - ), + useCallback((key: KeyEvent) => { + if (key.ctrl && key.name === 'c') { + key.preventDefault?.() + exitFreebuffCleanly() + } + }, []), ) const [exitHover, setExitHover] = useState(false) @@ -148,7 +127,7 @@ export const WaitingRoomScreen: React.FC = ({ }} > )} From fdf60ae635ddf3252aec8801f586b26c46398cc3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 19:05:26 -0700 Subject: [PATCH 29/31] More cleanup --- .../components/freebuff-session-countdown.tsx | 45 ------------------- .../components/freebuff-superseded-screen.tsx | 18 ++------ cli/src/components/waiting-room-screen.tsx | 17 ++----- cli/src/hooks/use-freebuff-ctrl-c-exit.ts | 23 ++++++++++ docs/freebuff-waiting-room.md | 14 +++--- web/src/app/api/v1/chat/completions/_post.ts | 25 +---------- .../session/__tests__/session.test.ts | 1 - .../free-session/__tests__/public-api.test.ts | 8 ++-- .../__tests__/session-view.test.ts | 23 +++------- web/src/server/free-session/admission.ts | 3 +- web/src/server/free-session/config.ts | 11 ++--- web/src/server/free-session/public-api.ts | 22 ++++++--- web/src/server/free-session/session-view.ts | 25 ++++------- 13 files changed, 77 insertions(+), 158 deletions(-) delete mode 100644 cli/src/components/freebuff-session-countdown.tsx create mode 100644 cli/src/hooks/use-freebuff-ctrl-c-exit.ts diff --git a/cli/src/components/freebuff-session-countdown.tsx b/cli/src/components/freebuff-session-countdown.tsx deleted file mode 100644 index 05047a0f21..0000000000 --- a/cli/src/components/freebuff-session-countdown.tsx +++ /dev/null @@ -1,45 +0,0 @@ -import React from 'react' - -import { useNow } from '../hooks/use-now' -import { useTheme } from '../hooks/use-theme' -import { IS_FREEBUFF } from '../utils/constants' - -import type { FreebuffSessionResponse } from '../types/freebuff-session' - -const LOW_THRESHOLD_MS = 60_000 - -const formatRemaining = (ms: number): string => { - if (ms <= 0) return 'expiring…' - const totalSeconds = Math.ceil(ms / 1000) - if (totalSeconds < 60) return `${totalSeconds}s left` - const minutes = Math.floor(totalSeconds / 60) - if (minutes < 60) return `${minutes}m left` - const hours = Math.floor(minutes / 60) - const rem = minutes % 60 - return rem === 0 ? `${hours}h left` : `${hours}h ${rem}m left` -} - -/** - * Small countdown shown while a freebuff session is active. Renders the - * time remaining until the server-issued `expiresAt` so users aren't - * surprised when their seat is released. Returns null in non-freebuff - * builds or when no active session exists — safe to always mount. - */ -export const FreebuffSessionCountdown: React.FC<{ - session: FreebuffSessionResponse | null -}> = ({ session }) => { - const theme = useTheme() - const expiresAtMs = - session?.status === 'active' ? Date.parse(session.expiresAt) : null - - const now = useNow(1000, expiresAtMs !== null) - - if (!IS_FREEBUFF || !expiresAtMs) return null - - const remainingMs = expiresAtMs - now - // Muted until the final minute, then a soft warning — deliberately not - // `theme.error` so the countdown reads informational, not alarming. - const color = remainingMs < LOW_THRESHOLD_MS ? theme.warning : theme.muted - - return {formatRemaining(remainingMs)} -} diff --git a/cli/src/components/freebuff-superseded-screen.tsx b/cli/src/components/freebuff-superseded-screen.tsx index a59ae3e144..c10c22a884 100644 --- a/cli/src/components/freebuff-superseded-screen.tsx +++ b/cli/src/components/freebuff-superseded-screen.tsx @@ -1,15 +1,12 @@ import { TextAttributes } from '@opentui/core' -import { useKeyboard } from '@opentui/react' -import React, { useCallback } from 'react' +import React from 'react' +import { useFreebuffCtrlCExit } from '../hooks/use-freebuff-ctrl-c-exit' import { useLogo } from '../hooks/use-logo' import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' import { useTheme } from '../hooks/use-theme' -import { exitFreebuffCleanly } from '../utils/freebuff-exit' import { getLogoAccentColor, getLogoBlockColor } from '../utils/theme-system' -import type { KeyEvent } from '@opentui/core' - /** * Terminal state shown after a 409 session_superseded response. Another CLI on * the same account rotated our instance id and we've stopped polling — the @@ -26,16 +23,7 @@ export const FreebuffSupersededScreen: React.FC = () => { blockColor, }) - // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes - // through as a normal OpenTUI key event. - useKeyboard( - useCallback((key: KeyEvent) => { - if (key.ctrl && key.name === 'c') { - key.preventDefault?.() - exitFreebuffCleanly() - } - }, []), - ) + useFreebuffCtrlCExit() return ( = ({ forceStart: true, }) - // Ctrl+C exits. Stdin is in raw mode, so SIGINT never fires — the key comes - // through as a normal OpenTUI key event. Shared with the top-right X button. - useKeyboard( - useCallback((key: KeyEvent) => { - if (key.ctrl && key.name === 'c') { - key.preventDefault?.() - exitFreebuffCleanly() - } - }, []), - ) + useFreebuffCtrlCExit() const [exitHover, setExitHover] = useState(false) diff --git a/cli/src/hooks/use-freebuff-ctrl-c-exit.ts b/cli/src/hooks/use-freebuff-ctrl-c-exit.ts new file mode 100644 index 0000000000..84dcb00bad --- /dev/null +++ b/cli/src/hooks/use-freebuff-ctrl-c-exit.ts @@ -0,0 +1,23 @@ +import { useKeyboard } from '@opentui/react' +import { useCallback } from 'react' + +import { exitFreebuffCleanly } from '../utils/freebuff-exit' + +import type { KeyEvent } from '@opentui/core' + +/** + * Bind Ctrl+C on a full-screen freebuff view to `exitFreebuffCleanly`. Stdin + * is in raw mode, so SIGINT never fires — the key arrives as a normal OpenTUI + * key event and we route it through the shared cleanup path (flush analytics, + * release the session seat, then process.exit). + */ +export function useFreebuffCtrlCExit(): void { + useKeyboard( + useCallback((key: KeyEvent) => { + if (key.ctrl && key.name === 'c') { + key.preventDefault?.() + exitFreebuffCleanly() + } + }, []), + ) +} diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 81c120989c..5dfe3d5a99 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -4,7 +4,7 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs: -1. **Drip-admit users** — admit at a steady trickle (default 1 per 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long. +1. **Drip-admit users** — admit at a steady trickle (default 1 per `ADMISSION_TICK_MS`, currently 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long. 2. **Gate on upstream health** — before each admission tick, probe the Fireworks metrics endpoint with a short timeout (`isFireworksAdmissible` in `web/src/server/free-session/admission.ts`). If it doesn't respond OK, admission halts until it does — this is the primary concurrency control, not a static cap. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. @@ -132,14 +132,13 @@ One pod runs the admission loop at a time, coordinated via Postgres advisory loc Each tick does (in order): 1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now() - grace`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage. -2. **Admit.** `admitFromQueue()` first calls `isFireworksAdmissible()` (short-timeout GET against the Fireworks metrics endpoint). If the probe fails, returns `{ skipped: 'health' }` — admission pauses and the queue grows until recovery. Otherwise opens a transaction, takes `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)`, and `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT MAX_ADMITS_PER_TICK FOR UPDATE SKIP LOCKED` → `UPDATE` the rows to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. Staggering at `MAX_ADMITS_PER_TICK=1` / 15s keeps Fireworks from a thundering herd of newly-admitted CLIs. +2. **Admit.** `admitFromQueue()` first calls `isFireworksAdmissible()` (short-timeout GET against the Fireworks metrics endpoint). If the probe fails, returns `{ skipped: 'health' }` — admission pauses and the queue grows until recovery. Otherwise opens a transaction, takes `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)`, and `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT 1 FOR UPDATE SKIP LOCKED` → `UPDATE` the row to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. One admit per tick keeps Fireworks from a thundering herd of newly-admitted CLIs. ### Tunables | Constant | Location | Default | Purpose | |---|---|---|---| -| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires | -| `MAX_ADMITS_PER_TICK` | `config.ts` | 1 | Upper bound on admits per tick | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. One user is admitted per tick. | | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | @@ -224,6 +223,7 @@ For free-mode requests (`codebuff_metadata.cost_mode === 'free'`), `_post.ts` ca | HTTP | `error` | When | |---|---|---| +| 426 | `freebuff_update_required` | Request did not include a `freebuff_instance_id` — the client is a pre-waiting-room build. The CLI shows the server-supplied message verbatim. | | 428 | `waiting_room_required` | No session row exists. Client should call POST /session. | | 429 | `waiting_room_queued` | Row exists with `status='queued'`. Client should keep polling GET. | | 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. | @@ -249,13 +249,11 @@ This is a **trust-the-client** design: the server still admits requests during t Computed in `session-view.ts` from the drip-admission rate: ``` -ticksAhead = ceil((position - 1) / maxAdmitsPerTick) -waitMs = ticksAhead * admissionTickMs +waitMs = (position - 1) * admissionTickMs ``` - Position 1 → 0 (next tick admits you) -- Position `maxAdmitsPerTick` + 1 → one tick -- and so on. +- Position 2 → one tick, and so on. This estimate **ignores health-gated pauses**: during a Fireworks incident admission halts entirely, so the actual wait can be longer. We choose to under-report here because showing "unknown" / "indefinite" is worse UX for the common case where the deployment is healthy. diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index b2f420882a..85e10437a9 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -147,6 +147,7 @@ const STATUS_BY_GATE_CODE = { waiting_room_queued: 429, session_superseded: 409, session_expired: 410, + freebuff_update_required: 426, } satisfies Record export async function postChatCompletions(params: { @@ -412,30 +413,8 @@ export async function postChatCompletions(params: { if (isFreeModeRequest) { const claimedInstanceId = typedBody.codebuff_metadata?.freebuff_instance_id - const gate = await checkSession({ - userId, - claimedInstanceId, - }) + const gate = await checkSession({ userId, claimedInstanceId }) if (!gate.ok) { - // Old freebuff clients (pre-waiting-room) never send an instance_id. - // Return a 426 with a clear "please restart to upgrade" message that - // their existing error banner will render verbatim. - if (!claimedInstanceId) { - trackEvent({ - event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, - userId, - properties: { error: 'freebuff_update_required' }, - logger, - }) - return NextResponse.json( - { - error: 'freebuff_update_required', - message: - 'This version of freebuff is out of date. Please restart freebuff to upgrade and continue using free mode.', - }, - { status: 426 }, - ) - } trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, userId, diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 3881faebad..d9cfb3ea48 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -34,7 +34,6 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { rows, isWaitingRoomEnabled: () => true, admissionTickMs: 15_000, - maxAdmitsPerTick: 1, graceMs: 30 * 60 * 1000, now: () => now, getSessionRow: async (userId) => rows.get(userId) ?? null, diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index 1e32df1a50..2e307d62c9 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -12,7 +12,6 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const TICK_MS = 15_000 -const ADMITS_PER_TICK = 1 const GRACE_MS = 30 * 60 * 1000 function makeDeps(overrides: Partial = {}): SessionDeps & { @@ -38,7 +37,6 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { _now: () => currentNow, isWaitingRoomEnabled: () => true, admissionTickMs: TICK_MS, - maxAdmitsPerTick: ADMITS_PER_TICK, graceMs: GRACE_MS, now: () => currentNow, getSessionRow: async (userId) => rows.get(userId) ?? null, @@ -329,7 +327,9 @@ describe('checkSessionAdmissible', () => { expect(result.code).toBe('session_superseded') }) - test('active + missing instance id → session_superseded (fails closed)', async () => { + test('missing instance id → freebuff_update_required (pre-waiting-room CLI)', async () => { + // Classified up front regardless of row state: old clients never send an + // id, so we surface a distinct code that maps to 426 Upgrade Required. await requestSession({ userId: 'u1', deps }) const row = deps.rows.get('u1')! row.status = 'active' @@ -342,7 +342,7 @@ describe('checkSessionAdmissible', () => { deps, }) if (result.ok) throw new Error('unreachable') - expect(result.code).toBe('session_superseded') + expect(result.code).toBe('freebuff_update_required') }) test('active inside grace window → ok with reason=draining', async () => { diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index 5f9bdac802..57d9d1e7d5 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -5,7 +5,6 @@ import { estimateWaitMs, toSessionStateResponse } from '../session-view' import type { InternalSessionRow } from '../types' const TICK_MS = 15_000 -const ADMITS_PER_TICK = 1 const GRACE_MS = 30 * 60_000 function row(overrides: Partial = {}): InternalSessionRow { @@ -25,26 +24,17 @@ function row(overrides: Partial = {}): InternalSessionRow { describe('estimateWaitMs', () => { test('position 1 → 0 wait (next tick picks you up)', () => { - expect(estimateWaitMs({ position: 1, admissionTickMs: TICK_MS, maxAdmitsPerTick: ADMITS_PER_TICK })).toBe(0) + expect(estimateWaitMs({ position: 1, admissionTickMs: TICK_MS })).toBe(0) }) - test('position N → (N-1) ticks ahead at 1 admit/tick', () => { - expect(estimateWaitMs({ position: 2, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(TICK_MS) - expect(estimateWaitMs({ position: 10, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(9 * TICK_MS) - }) - - test('batched admission divides wait', () => { - // 5 admits/tick: positions 2-6 all sit one tick ahead. - expect(estimateWaitMs({ position: 2, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(TICK_MS) - expect(estimateWaitMs({ position: 6, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(TICK_MS) - // Position 7 enters the second tick. - expect(estimateWaitMs({ position: 7, admissionTickMs: TICK_MS, maxAdmitsPerTick: 5 })).toBe(2 * TICK_MS) + test('position N → (N-1) ticks ahead', () => { + expect(estimateWaitMs({ position: 2, admissionTickMs: TICK_MS })).toBe(TICK_MS) + expect(estimateWaitMs({ position: 10, admissionTickMs: TICK_MS })).toBe(9 * TICK_MS) }) test('degenerate inputs return 0', () => { - expect(estimateWaitMs({ position: 0, admissionTickMs: TICK_MS, maxAdmitsPerTick: 1 })).toBe(0) - expect(estimateWaitMs({ position: 5, admissionTickMs: 0, maxAdmitsPerTick: 1 })).toBe(0) - expect(estimateWaitMs({ position: 5, admissionTickMs: TICK_MS, maxAdmitsPerTick: 0 })).toBe(0) + expect(estimateWaitMs({ position: 0, admissionTickMs: TICK_MS })).toBe(0) + expect(estimateWaitMs({ position: 5, admissionTickMs: 0 })).toBe(0) }) }) @@ -52,7 +42,6 @@ describe('toSessionStateResponse', () => { const now = new Date('2026-04-17T12:00:00Z') const baseArgs = { admissionTickMs: TICK_MS, - maxAdmitsPerTick: ADMITS_PER_TICK, graceMs: GRACE_MS, } diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 428ffd5a79..71c2c97c52 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -2,7 +2,6 @@ import { env } from '@codebuff/internal/env' import { ADMISSION_TICK_MS, - MAX_ADMITS_PER_TICK, getSessionGraceMs, getSessionLengthMs, isWaitingRoomEnabled, @@ -153,7 +152,7 @@ export function startFreeSessionAdmission(): boolean { if (typeof interval.unref === 'function') interval.unref() runTick() // fire first tick immediately logger.info( - { tickMs: ADMISSION_TICK_MS, maxAdmitsPerTick: MAX_ADMITS_PER_TICK }, + { tickMs: ADMISSION_TICK_MS }, '[FreeSessionAdmission] Started', ) return true diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 23302f0bd0..4e9e729c1b 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -7,16 +7,11 @@ import { env } from '@codebuff/internal/env' */ export const FREEBUFF_ADMISSION_LOCK_ID = 573924815 -/** Admission tick cadence. Paired with MAX_ADMITS_PER_TICK=1 this staggers - * admissions so newly-admitted CLIs don't all POST to the - * Fireworks deployment simultaneously. */ +/** Admission tick cadence. Each tick admits at most one user, so this is the + * drip rate: staggering admissions keeps newly-admitted CLIs from all hitting + * Fireworks simultaneously even when a large block of sessions expires at once. */ export const ADMISSION_TICK_MS = 15_000 -/** Max users admitted in a single tick. Staggering matters more than - * throughput here: keeps load on Fireworks smooth even when a - * large block of sessions expires at once. */ -export const MAX_ADMITS_PER_TICK = 1 - export function isWaitingRoomEnabled(): boolean { return env.FREEBUFF_WAITING_ROOM_ENABLED } diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 1b4b7be919..c3b09b3b0e 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,6 +1,5 @@ import { ADMISSION_TICK_MS, - MAX_ADMITS_PER_TICK, getSessionGraceMs, isWaitingRoomEnabled, } from './config' @@ -27,7 +26,6 @@ export interface SessionDeps { * interface uses values rather than thunks so tests can pass numbers * inline without wrapping. */ admissionTickMs: number - maxAdmitsPerTick: number graceMs: number now?: () => Date } @@ -40,7 +38,6 @@ const defaultDeps: SessionDeps = { queuePositionFor, isWaitingRoomEnabled, admissionTickMs: ADMISSION_TICK_MS, - maxAdmitsPerTick: MAX_ADMITS_PER_TICK, get graceMs() { // Read-through getter so test overrides via env still work; the value // itself is materialized once per call. Cheaper than a thunk because @@ -68,7 +65,6 @@ async function viewForRow( position, queueDepth: depth, admissionTickMs: deps.admissionTickMs, - maxAdmitsPerTick: deps.maxAdmitsPerTick, graceMs: deps.graceMs, now: nowOf(deps), }) @@ -160,6 +156,9 @@ export type SessionGateResult = | { ok: false; code: 'waiting_room_queued'; message: string } | { ok: false; code: 'session_superseded'; message: string } | { ok: false; code: 'session_expired'; message: string } + /** Pre-waiting-room CLI that never sends an instance id. Surfaced as a + * distinct code so the caller can prompt the user to restart. */ + | { ok: false; code: 'freebuff_update_required'; message: string } /** * Called from the chat/completions hot path for free-mode requests. Either @@ -180,6 +179,19 @@ export async function checkSessionAdmissible(params: { const deps = params.deps ?? defaultDeps if (!deps.isWaitingRoomEnabled()) return { ok: true, reason: 'disabled' } + // Pre-waiting-room CLIs never send a freebuff_instance_id. Classify that up + // front so the caller gets a distinct code (→ 426 Upgrade Required) and the + // user sees a clear "please restart" message instead of a gate reject they + // can't interpret. + if (!params.claimedInstanceId) { + return { + ok: false, + code: 'freebuff_update_required', + message: + 'This version of freebuff is out of date. Please restart freebuff to upgrade and continue using free mode.', + } + } + const row = await deps.getSessionRow(params.userId) if (!row) { @@ -213,7 +225,7 @@ export async function checkSessionAdmissible(params: { } } - if (!params.claimedInstanceId || params.claimedInstanceId !== row.active_instance_id) { + if (params.claimedInstanceId !== row.active_instance_id) { return { ok: false, code: 'session_superseded', diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index e93f65217f..b154e177b3 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -14,11 +14,10 @@ export function toSessionStateResponse(params: { position: number queueDepth: number admissionTickMs: number - maxAdmitsPerTick: number graceMs: number now: Date }): SessionStateResponse | null { - const { row, position, queueDepth, admissionTickMs, maxAdmitsPerTick, graceMs, now } = params + const { row, position, queueDepth, admissionTickMs, graceMs, now } = params if (!row) return null if (row.status === 'active' && row.expires_at) { @@ -52,11 +51,7 @@ export function toSessionStateResponse(params: { instanceId: row.active_instance_id, position, queueDepth, - estimatedWaitMs: estimateWaitMs({ - position, - admissionTickMs, - maxAdmitsPerTick, - }), + estimatedWaitMs: estimateWaitMs({ position, admissionTickMs }), queuedAt: row.queued_at.toISOString(), } } @@ -66,21 +61,17 @@ export function toSessionStateResponse(params: { } /** - * Wait-time estimate under the drip-admission model: we admit - * `maxAdmitsPerTick` users every `admissionTickMs`, gated by Fireworks - * health. Ignoring health pauses, user at position P waits roughly - * `ceil((P - 1) / maxAdmitsPerTick) * admissionTickMs`. + * Wait-time estimate under the drip-admission model: one user per + * `admissionTickMs`, gated by Fireworks health. Ignoring health pauses, the + * user at position P waits roughly `(P - 1) * admissionTickMs`. * * Position 1 → 0ms (next tick picks you up). - * Position maxAdmitsPerTick+1 → one tick. */ export function estimateWaitMs(params: { position: number admissionTickMs: number - maxAdmitsPerTick: number }): number { - const { position, admissionTickMs, maxAdmitsPerTick } = params - if (position <= 1 || admissionTickMs <= 0 || maxAdmitsPerTick <= 0) return 0 - const ticksAhead = Math.ceil((position - 1) / maxAdmitsPerTick) - return ticksAhead * admissionTickMs + const { position, admissionTickMs } = params + if (position <= 1 || admissionTickMs <= 0) return 0 + return (position - 1) * admissionTickMs } From 59a0e48e94e5c348e9c013d6c8be79cc6e8da580 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 20:09:18 -0700 Subject: [PATCH 30/31] Fix basher test --- agents/__tests__/basher.test.ts | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/agents/__tests__/basher.test.ts b/agents/__tests__/basher.test.ts index 282d5571c4..f83ecb01ae 100644 --- a/agents/__tests__/basher.test.ts +++ b/agents/__tests__/basher.test.ts @@ -59,15 +59,11 @@ describe('commander agent', () => { expect(schema?.params?.required).not.toContain('timeout_seconds') }) - test('has optional rawOutput parameter', () => { + test('has optional what_to_summarize parameter', () => { const schema = commander.inputSchema - const rawOutputProp = schema?.params?.properties?.rawOutput - expect(rawOutputProp && typeof rawOutputProp === 'object' && 'type' in rawOutputProp && rawOutputProp.type).toBe('boolean') - expect(schema?.params?.required).not.toContain('rawOutput') - }) - - test('has prompt parameter', () => { - expect(commander.inputSchema?.prompt?.type).toBe('string') + const summarizeProp = schema?.params?.properties?.what_to_summarize + expect(summarizeProp && typeof summarizeProp === 'object' && 'type' in summarizeProp && summarizeProp.type).toBe('string') + expect(schema?.params?.required).not.toContain('what_to_summarize') }) }) @@ -149,7 +145,7 @@ describe('commander agent', () => { }) }) - test('yields set_output with raw result when rawOutput is true', () => { + test('yields set_output with raw result when what_to_summarize is not provided', () => { const mockAgentState = createMockAgentState() const mockLogger = { debug: () => {}, @@ -161,7 +157,7 @@ describe('commander agent', () => { const generator = commander.handleSteps!({ agentState: mockAgentState, logger: mockLogger as any, - params: { command: 'echo hello', rawOutput: true }, + params: { command: 'echo hello' }, }) // First yield is the command @@ -190,7 +186,7 @@ describe('commander agent', () => { expect(final.done).toBe(true) }) - test('yields STEP for model analysis when rawOutput is false', () => { + test('yields STEP for model analysis when what_to_summarize is provided', () => { const mockAgentState = createMockAgentState() const mockLogger = { debug: () => {}, @@ -202,7 +198,7 @@ describe('commander agent', () => { const generator = commander.handleSteps!({ agentState: mockAgentState, logger: mockLogger as any, - params: { command: 'ls -la', rawOutput: false }, + params: { command: 'ls -la', what_to_summarize: 'list of files' }, }) // First yield is the command @@ -233,7 +229,7 @@ describe('commander agent', () => { const generator = commander.handleSteps!({ agentState: mockAgentState, logger: mockLogger as any, - params: { command: 'echo test', rawOutput: true }, + params: { command: 'echo test' }, }) // First yield is the command @@ -266,7 +262,7 @@ describe('commander agent', () => { const generator = commander.handleSteps!({ agentState: mockAgentState, logger: mockLogger as any, - params: { command: 'echo test', rawOutput: true }, + params: { command: 'echo test' }, }) // First yield is the command From 1aeab98d4977ef015a714c6eb2f3da3c2e662adf Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 18 Apr 2026 21:33:24 -0700 Subject: [PATCH 31/31] Handle old backend result --- cli/src/hooks/use-freebuff-session.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index dc779e057b..d031f69e72 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -44,6 +44,12 @@ async function callSession( headers, signal: opts.signal, }) + // 404 = endpoint not deployed on this server (older web build). Treat as + // "waiting room disabled" so a newer CLI against an older server still + // works, rather than stranding users in a waiting room forever. + if (resp.status === 404) { + return { status: 'disabled' } + } if (!resp.ok) { const text = await resp.text().catch(() => '') throw new Error(