Skip to content

Commit 5ddb102

Browse files
jahoomaclaude
andcommitted
Add freebuff session grace window
Keep admitting requests for FREEBUFF_SESSION_GRACE_MS (default 30m) after a session's expires_at so in-flight agent runs can drain; hard cutoff past that. Also: replicas=0 → unhealthy, hoist chat/completions gate status map, fix stale threshold comment and a pre-existing free-mode test missing the checkSessionAdmissible override. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 845bed1 commit 5ddb102

File tree

14 files changed

+185
-32
lines changed

14 files changed

+185
-32
lines changed

packages/internal/src/env-schema.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
4242
.default('false')
4343
.transform((v) => v === 'true'),
4444
FREEBUFF_SESSION_LENGTH_MS: z.coerce.number().int().positive().default(60 * 60 * 1000),
45+
FREEBUFF_SESSION_GRACE_MS: z.coerce.number().int().nonnegative().default(30 * 60 * 1000),
4546
})
4647
export const serverEnvVars = serverEnvSchema.keyof().options
4748
export type ServerEnvVar = (typeof serverEnvVars)[number]
@@ -93,4 +94,5 @@ export const serverProcessEnv: ServerInput = {
9394
// Freebuff waiting room
9495
FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED,
9596
FREEBUFF_SESSION_LENGTH_MS: process.env.FREEBUFF_SESSION_LENGTH_MS,
97+
FREEBUFF_SESSION_GRACE_MS: process.env.FREEBUFF_SESSION_GRACE_MS,
9698
}

web/src/app/api/v1/chat/completions/__tests__/completions.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
583583
fetch: mockFetch,
584584
insertMessageBigquery: mockInsertMessageBigquery,
585585
loggerWithContext: mockLoggerWithContext,
586+
checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
586587
})
587588

588589
expect(response.status).toBe(200)

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ import {
6868
OpenRouterError,
6969
} from '@/llm-api/openrouter'
7070
import { checkSessionAdmissible } from '@/server/free-session/public-api'
71+
72+
import type { SessionGateResult } from '@/server/free-session/public-api'
7173
import { extractApiKeyFromHeader } from '@/util/auth'
7274
import { withDefaultProperties } from '@codebuff/common/analytics'
7375
import { checkFreeModeRateLimit } from './free-mode-rate-limiter'
@@ -138,6 +140,15 @@ export const formatQuotaResetCountdown = (
138140

139141
export type CheckSessionAdmissibleFn = typeof checkSessionAdmissible
140142

143+
type GateRejectCode = Extract<SessionGateResult, { ok: false }>['code']
144+
145+
const STATUS_BY_GATE_CODE = {
146+
waiting_room_required: 428,
147+
waiting_room_queued: 429,
148+
session_superseded: 409,
149+
session_expired: 410,
150+
} satisfies Record<GateRejectCode, number>
151+
141152
export async function postChatCompletions(params: {
142153
req: NextRequest
143154
getUserInfoFromApiKey: GetUserInfoFromApiKeyFn
@@ -410,15 +421,9 @@ export async function postChatCompletions(params: {
410421
properties: { error: gate.code },
411422
logger,
412423
})
413-
const statusByCode: Record<string, number> = {
414-
waiting_room_required: 428,
415-
waiting_room_queued: 429,
416-
session_superseded: 409,
417-
session_expired: 410,
418-
}
419424
return NextResponse.json(
420425
{ error: gate.code, message: gate.message },
421-
{ status: statusByCode[gate.code] ?? 429 },
426+
{ status: STATUS_BY_GATE_CODE[gate.code] },
422427
)
423428
}
424429
}

web/src/app/api/v1/freebuff/session/__tests__/session.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ function makeSessionDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
3030
isWaitingRoomEnabled: () => true,
3131
getAdmissionTickMs: () => 15_000,
3232
getMaxAdmitsPerTick: () => 1,
33+
getSessionGraceMs: () => 30 * 60 * 1000,
3334
now: () => now,
3435
getSessionRow: async (userId) => rows.get(userId) ?? null,
3536
queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length,

web/src/server/fireworks-monitor/compute-health.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,8 @@ export interface HealthThresholds {
3838
ttftMsUnhealthy: number
3939
}
4040

41-
// Default thresholds are calibrated to the observed freebuff workload on
42-
// glm-5.1 / kimi-k2.5. They are intentionally loose at first so a cold
43-
// deployment does not flap; expect to tighten once you have a week of
44-
// live data. Override per-instance via startFireworksMonitor({ thresholds }).
41+
// Tuned to trip 'degraded' before users feel it on glm-5.1. Override per-instance
42+
// via startFireworksMonitor({ thresholds }).
4543
export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = {
4644
staleSnapshotMs: 3 * 60 * 1000,
4745
minRequestRateForErrorCheck: 0.1,

web/src/server/free-session/__tests__/admission.test.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
2222
isFireworksAdmissible: () => true,
2323
getMaxAdmitsPerTick: () => 1,
2424
getSessionLengthMs: () => 60 * 60 * 1000,
25+
getSessionGraceMs: () => 30 * 60 * 1000,
2526
now: () => NOW,
2627
...overrides,
2728
}
@@ -73,4 +74,17 @@ describe('runAdmissionTick', () => {
7374
expect(result.expired).toBe(2)
7475
expect(result.admitted).toBe(1)
7576
})
77+
78+
test('forwards grace ms to sweepExpired', async () => {
79+
const received: number[] = []
80+
const deps = makeAdmissionDeps({
81+
getSessionGraceMs: () => 12_345,
82+
sweepExpired: async (_now, graceMs) => {
83+
received.push(graceMs)
84+
return 0
85+
},
86+
})
87+
await runAdmissionTick(deps)
88+
expect(received).toEqual([12_345])
89+
})
7690
})

web/src/server/free-session/__tests__/public-api.test.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import type { InternalSessionRow } from '../types'
1313
const SESSION_LEN = 60 * 60 * 1000
1414
const TICK_MS = 15_000
1515
const ADMITS_PER_TICK = 1
16+
const GRACE_MS = 30 * 60 * 1000
1617

1718
function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
1819
rows: Map<string, InternalSessionRow>
@@ -38,6 +39,7 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
3839
isWaitingRoomEnabled: () => true,
3940
getAdmissionTickMs: () => TICK_MS,
4041
getMaxAdmitsPerTick: () => ADMITS_PER_TICK,
42+
getSessionGraceMs: () => GRACE_MS,
4143
now: () => currentNow,
4244
getSessionRow: async (userId) => rows.get(userId) ?? null,
4345
endSession: async (userId) => {
@@ -250,12 +252,30 @@ describe('checkSessionAdmissible', () => {
250252
expect(result.code).toBe('session_superseded')
251253
})
252254

253-
test('active but expires_at in the past → session_expired', async () => {
255+
test('active inside grace window → ok with reason=draining', async () => {
256+
await requestSession({ userId: 'u1', deps })
257+
const row = deps.rows.get('u1')!
258+
row.status = 'active'
259+
row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000)
260+
// 1 minute past expiry, well within the 30-minute grace window
261+
row.expires_at = new Date(deps._now().getTime() - 60_000)
262+
263+
const result = await checkSessionAdmissible({
264+
userId: 'u1',
265+
claimedInstanceId: row.active_instance_id,
266+
deps,
267+
})
268+
expect(result.ok).toBe(true)
269+
if (!result.ok || result.reason !== 'draining') throw new Error('unreachable')
270+
expect(result.gracePeriodRemainingMs).toBe(GRACE_MS - 60_000)
271+
})
272+
273+
test('active past the grace window → session_expired', async () => {
254274
await requestSession({ userId: 'u1', deps })
255275
const row = deps.rows.get('u1')!
256276
row.status = 'active'
257277
row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN)
258-
row.expires_at = new Date(deps._now().getTime() - 1)
278+
row.expires_at = new Date(deps._now().getTime() - GRACE_MS - 1)
259279

260280
const result = await checkSessionAdmissible({
261281
userId: 'u1',
@@ -265,6 +285,22 @@ describe('checkSessionAdmissible', () => {
265285
if (result.ok) throw new Error('unreachable')
266286
expect(result.code).toBe('session_expired')
267287
})
288+
289+
test('draining + wrong instance id still rejects with session_superseded', async () => {
290+
await requestSession({ userId: 'u1', deps })
291+
const row = deps.rows.get('u1')!
292+
row.status = 'active'
293+
row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000)
294+
row.expires_at = new Date(deps._now().getTime() - 60_000)
295+
296+
const result = await checkSessionAdmissible({
297+
userId: 'u1',
298+
claimedInstanceId: 'stale-token',
299+
deps,
300+
})
301+
if (result.ok) throw new Error('unreachable')
302+
expect(result.code).toBe('session_superseded')
303+
})
268304
})
269305

270306
describe('endUserSession', () => {

web/src/server/free-session/__tests__/session-view.test.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type { InternalSessionRow } from '../types'
66

77
const TICK_MS = 15_000
88
const ADMITS_PER_TICK = 1
9+
const GRACE_MS = 30 * 60_000
910

1011
function row(overrides: Partial<InternalSessionRow> = {}): InternalSessionRow {
1112
const now = new Date('2026-04-17T12:00:00Z')
@@ -52,6 +53,7 @@ describe('toSessionStateResponse', () => {
5253
const baseArgs = {
5354
admissionTickMs: TICK_MS,
5455
maxAdmitsPerTick: ADMITS_PER_TICK,
56+
graceMs: GRACE_MS,
5557
}
5658

5759
test('returns null when row is null', () => {
@@ -102,9 +104,33 @@ describe('toSessionStateResponse', () => {
102104
})
103105
})
104106

105-
test('active but expired row maps to null (caller should re-queue)', () => {
107+
test('active row inside grace window maps to draining response', () => {
108+
const admittedAt = new Date(now.getTime() - 65 * 60_000)
109+
const expiresAt = new Date(now.getTime() - 5 * 60_000) // 5 min past expiry
106110
const view = toSessionStateResponse({
107-
row: row({ status: 'active', admitted_at: now, expires_at: new Date(now.getTime() - 1) }),
111+
row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }),
112+
position: 0,
113+
queueDepth: 0,
114+
...baseArgs,
115+
now,
116+
})
117+
expect(view).toEqual({
118+
status: 'draining',
119+
instanceId: 'inst-1',
120+
admittedAt: admittedAt.toISOString(),
121+
expiresAt: expiresAt.toISOString(),
122+
gracePeriodEndsAt: new Date(expiresAt.getTime() + GRACE_MS).toISOString(),
123+
gracePeriodRemainingMs: GRACE_MS - 5 * 60_000,
124+
})
125+
})
126+
127+
test('active row past the grace window maps to null (caller should re-queue)', () => {
128+
const view = toSessionStateResponse({
129+
row: row({
130+
status: 'active',
131+
admitted_at: now,
132+
expires_at: new Date(now.getTime() - GRACE_MS - 1),
133+
}),
108134
position: 0,
109135
queueDepth: 0,
110136
...baseArgs,

web/src/server/free-session/admission.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {
22
ADMISSION_TICK_MS,
33
MAX_ADMITS_PER_TICK,
4+
getSessionGraceMs,
45
getSessionLengthMs,
56
isWaitingRoomEnabled,
67
} from './config'
@@ -23,7 +24,7 @@ let state: AdmissionState | null = null
2324
const SNAPSHOT_EVERY_N_TICKS = 10
2425

2526
export interface AdmissionDeps {
26-
sweepExpired: (now: Date) => Promise<number>
27+
sweepExpired: (now: Date, graceMs: number) => Promise<number>
2728
countActive: (now: Date) => Promise<number>
2829
queueDepth: () => Promise<number>
2930
admitFromQueue: (params: {
@@ -34,6 +35,7 @@ export interface AdmissionDeps {
3435
isFireworksAdmissible: () => boolean
3536
getMaxAdmitsPerTick: () => number
3637
getSessionLengthMs: () => number
38+
getSessionGraceMs: () => number
3739
now?: () => Date
3840
}
3941

@@ -45,6 +47,7 @@ const defaultDeps: AdmissionDeps = {
4547
isFireworksAdmissible,
4648
getMaxAdmitsPerTick: () => MAX_ADMITS_PER_TICK,
4749
getSessionLengthMs,
50+
getSessionGraceMs,
4851
}
4952

5053
export interface AdmissionTickResult {
@@ -73,7 +76,7 @@ export async function runAdmissionTick(
7376
deps: AdmissionDeps = defaultDeps,
7477
): Promise<AdmissionTickResult> {
7578
const now = (deps.now ?? (() => new Date()))()
76-
const expired = await deps.sweepExpired(now)
79+
const expired = await deps.sweepExpired(now, deps.getSessionGraceMs())
7780

7881
if (!deps.isFireworksAdmissible()) {
7982
const [active, depth] = await Promise.all([

web/src/server/free-session/config.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,11 @@ export function isWaitingRoomEnabled(): boolean {
2424
export function getSessionLengthMs(): number {
2525
return env.FREEBUFF_SESSION_LENGTH_MS
2626
}
27+
28+
/** Drain window after a session's `expires_at`. During this window the gate
29+
* still admits requests so an in-flight agent run can finish, but the CLI is
30+
* expected to stop accepting new user prompts. Hard cutoff at
31+
* `expires_at + grace`; past that the gate returns `session_expired`. */
32+
export function getSessionGraceMs(): number {
33+
return env.FREEBUFF_SESSION_GRACE_MS
34+
}

0 commit comments

Comments
 (0)