Skip to content

Commit e25cde5

Browse files
jahoomaclaude
andcommitted
Tighten TTFT/queue degraded thresholds; add scrape-check script
p50 TTFT degraded 1000 → 1500ms and p50 generation queue degraded 200 → 300ms, so a healthy deployment running at steady-state 1s TTFT does not trip the admission gate. scripts/scrape-check.ts pulls the live Fireworks metrics and prints the same per-deployment health the admission gate sees — useful for tuning thresholds without guessing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0a1bd36 commit e25cde5

File tree

2 files changed

+60
-6
lines changed

2 files changed

+60
-6
lines changed

web/scripts/scrape-check.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/**
2+
* One-off: scrape Fireworks metrics for each configured deployment and print
3+
* the same health summary the admission gate would see.
4+
*
5+
* Usage:
6+
* bun run web/scripts/scrape-check.ts
7+
*/
8+
9+
import { env } from '@codebuff/internal/env'
10+
11+
import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '@/server/fireworks-monitor/compute-health'
12+
import { scrapeFireworksMetrics } from '@/server/fireworks-monitor/monitor'
13+
import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config'
14+
15+
async function main() {
16+
const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP)
17+
const metrics = await scrapeFireworksMetrics({
18+
apiKey: env.FIREWORKS_API_KEY,
19+
accountId: FIREWORKS_ACCOUNT_ID,
20+
})
21+
const snapshot = computeSnapshot({
22+
metrics,
23+
deployments,
24+
thresholds: DEFAULT_HEALTH_THRESHOLDS,
25+
})
26+
27+
console.log(`scrapedAt: ${new Date(snapshot.scrapedAt ?? 0).toISOString()}`)
28+
console.log(`overall: ${snapshot.overall}\n`)
29+
30+
for (const [deployment, health] of Object.entries(snapshot.deployments)) {
31+
console.log(`── ${deployment} (${health.baseModel ?? 'unknown'})`)
32+
console.log(` status: ${health.status}`)
33+
console.log(` replicas: ${health.metrics.replicas}`)
34+
console.log(` req/s: ${health.metrics.requestRate.toFixed(2)}`)
35+
console.log(` errors: ${(health.metrics.errorFraction * 100).toFixed(2)}%`)
36+
console.log(` kvBlocks: ${(health.metrics.kvBlocksFraction * 100).toFixed(1)}%`)
37+
console.log(` kvSlots: ${(health.metrics.kvSlotsFraction * 100).toFixed(1)}%`)
38+
console.log(` concurrent: ${health.metrics.concurrentRequests.toFixed(1)}`)
39+
const q = health.metrics.p50GenerationQueueMs
40+
const t = health.metrics.p50TimeToFirstTokenMs
41+
console.log(` p50 queue: ${q === null ? 'n/a' : `${Math.round(q)}ms`}`)
42+
console.log(` p50 TTFT: ${t === null ? 'n/a' : `${Math.round(t)}ms`}`)
43+
if (health.reasons.length > 0) {
44+
console.log(` reasons:`)
45+
for (const r of health.reasons) console.log(` - ${r}`)
46+
}
47+
console.log()
48+
}
49+
}
50+
51+
void main().catch((error) => {
52+
console.error(error)
53+
process.exit(1)
54+
})

web/src/server/fireworks-monitor/compute-health.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = {
4747
minRequestRateForErrorCheck: 0.1,
4848
errorFractionDegraded: 0.02,
4949
errorFractionUnhealthy: 0.1,
50-
kvBlocksFractionDegraded: 0.95,
51-
kvBlocksFractionUnhealthy: 0.99,
52-
generationQueueMsDegraded: 400,
53-
generationQueueMsUnhealthy: 15_000,
54-
ttftMsDegraded: 2_000,
55-
ttftMsUnhealthy: 30_000,
50+
kvBlocksFractionDegraded: 0.85,
51+
kvBlocksFractionUnhealthy: 0.97,
52+
generationQueueMsDegraded: 300,
53+
generationQueueMsUnhealthy: 2_000,
54+
ttftMsDegraded: 1_500,
55+
ttftMsUnhealthy: 10_000,
5656
}
5757

5858
const STATUS_RANK: Record<DeploymentHealthStatus, number> = {

0 commit comments

Comments
 (0)