Skip to content

Commit fd95c05

Browse files
committed
Fireworks health monitor
1 parent e921882 commit fd95c05

File tree

14 files changed

+1540
-7
lines changed

14 files changed

+1540
-7
lines changed

scripts/check-fireworks-health.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env bun
2+
3+
/**
4+
* Scrape Fireworks metrics once and print the health snapshot the
5+
* web server's monitor would produce. Useful for ad-hoc verification.
6+
*
7+
* Usage:
8+
* bun scripts/check-fireworks-health.ts
9+
* bun scripts/check-fireworks-health.ts --raw # also print raw metrics count
10+
* bun scripts/check-fireworks-health.ts --json # machine-readable output
11+
*
12+
* Reads FIREWORKS_API_KEY from env (.env.local is loaded automatically by bun).
13+
*/
14+
15+
import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '../web/src/server/fireworks-monitor/compute-health'
16+
import { parsePrometheusText } from '../web/src/server/fireworks-monitor/parse-prometheus'
17+
import {
18+
FIREWORKS_ACCOUNT_ID,
19+
FIREWORKS_DEPLOYMENT_MAP,
20+
} from '../web/src/llm-api/fireworks-config'
21+
22+
import type { DeploymentHealthStatus } from '../web/src/server/fireworks-monitor/types'
23+
24+
const METRICS_URL = (accountId: string) =>
25+
`https://api.fireworks.ai/v1/accounts/${accountId}/metrics`
26+
27+
async function scrapeFireworksMetrics(params: { apiKey: string; accountId: string }) {
28+
const response = await fetch(METRICS_URL(params.accountId), {
29+
headers: { Authorization: `Bearer ${params.apiKey}` },
30+
})
31+
if (!response.ok) {
32+
const body = await response.text().catch(() => '')
33+
throw new Error(
34+
`Fireworks metrics scrape failed: ${response.status} ${response.statusText}${body ? ` — ${body.slice(0, 300)}` : ''}`,
35+
)
36+
}
37+
const text = await response.text()
38+
return parsePrometheusText(text)
39+
}
40+
41+
const STATUS_COLORS: Record<DeploymentHealthStatus, string> = {
42+
healthy: '\x1b[32m',
43+
degraded: '\x1b[33m',
44+
unhealthy: '\x1b[31m',
45+
unknown: '\x1b[90m',
46+
}
47+
const RESET = '\x1b[0m'
48+
49+
function formatMs(value: number | null): string {
50+
if (value === null) return 'n/a'
51+
if (value >= 1000) return `${(value / 1000).toFixed(2)}s`
52+
return `${Math.round(value)}ms`
53+
}
54+
55+
function formatPct(value: number, digits = 1): string {
56+
return `${(value * 100).toFixed(digits)}%`
57+
}
58+
59+
async function main() {
60+
const args = process.argv.slice(2)
61+
const jsonMode = args.includes('--json')
62+
const showRaw = args.includes('--raw')
63+
64+
const apiKey = process.env.FIREWORKS_API_KEY
65+
if (!apiKey) {
66+
console.error('❌ FIREWORKS_API_KEY is not set. Add it to .env.local or export it.')
67+
process.exit(1)
68+
}
69+
70+
const accountId = process.env.FIREWORKS_ACCOUNT_ID ?? FIREWORKS_ACCOUNT_ID
71+
const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP)
72+
73+
const scrapeStart = Date.now()
74+
let metrics
75+
try {
76+
metrics = await scrapeFireworksMetrics({ apiKey, accountId })
77+
} catch (error) {
78+
console.error('❌ Scrape failed:', error instanceof Error ? error.message : error)
79+
process.exit(1)
80+
}
81+
const scrapeElapsedMs = Date.now() - scrapeStart
82+
83+
const snapshot = computeSnapshot({
84+
metrics,
85+
deployments,
86+
thresholds: DEFAULT_HEALTH_THRESHOLDS,
87+
})
88+
89+
if (jsonMode) {
90+
console.log(JSON.stringify({ scrapeElapsedMs, sampleCount: metrics.samples.length, snapshot }, null, 2))
91+
return
92+
}
93+
94+
console.log('🔥 Fireworks Deployment Health')
95+
console.log('='.repeat(78))
96+
console.log(`Account: accounts/${accountId}`)
97+
console.log(`Scraped in: ${scrapeElapsedMs}ms`)
98+
console.log(`Samples: ${metrics.samples.length}`)
99+
console.log(`Overall: ${STATUS_COLORS[snapshot.overall]}${snapshot.overall.toUpperCase()}${RESET}`)
100+
if (snapshot.lastError) console.log(`Last error: ${snapshot.lastError}`)
101+
console.log()
102+
103+
const modelByDeployment = Object.fromEntries(
104+
Object.entries(FIREWORKS_DEPLOYMENT_MAP).map(([model, dep]) => [dep, model]),
105+
)
106+
107+
for (const [deployment, health] of Object.entries(snapshot.deployments)) {
108+
const model = modelByDeployment[deployment] ?? '(unknown model)'
109+
const color = STATUS_COLORS[health.status]
110+
console.log(`── ${color}${health.status.toUpperCase().padEnd(9)}${RESET} ${model}`)
111+
console.log(` deployment: ${deployment}`)
112+
console.log(` base model: ${health.baseModel ?? 'n/a'}`)
113+
console.log(` request rate: ${health.metrics.requestRate.toFixed(3)} req/s`)
114+
console.log(` error rate: ${health.metrics.errorRate.toFixed(3)} err/s (${formatPct(health.metrics.errorFraction)})`)
115+
console.log(` concurrent requests: ${health.metrics.concurrentRequests.toFixed(2)}`)
116+
console.log(` KV blocks utilization: ${formatPct(health.metrics.kvBlocksFraction, 0)}`)
117+
console.log(` KV slots utilization: ${formatPct(health.metrics.kvSlotsFraction, 0)}`)
118+
console.log(` p50 queue wait: ${formatMs(health.metrics.p50GenerationQueueMs)}`)
119+
console.log(` p50 TTFT: ${formatMs(health.metrics.p50TimeToFirstTokenMs)}`)
120+
if (health.reasons.length > 0) {
121+
console.log(` reasons: ${health.reasons.join('; ')}`)
122+
}
123+
console.log()
124+
}
125+
126+
if (showRaw) {
127+
console.log('── Metric name breakdown ─────────────────────────────')
128+
const counts = new Map<string, number>()
129+
for (const s of metrics.samples) {
130+
counts.set(s.name, (counts.get(s.name) ?? 0) + 1)
131+
}
132+
const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1])
133+
for (const [name, count] of sorted) {
134+
console.log(` ${String(count).padStart(4)} ${name}`)
135+
}
136+
}
137+
138+
process.exit(snapshot.overall === 'unhealthy' ? 2 : 0)
139+
}
140+
141+
main()

web/instrumentation.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* causing Render's proxy to return 502 Bad Gateway errors.
99
*/
1010

11+
import { startFireworksMonitor } from '@/server/fireworks-monitor/monitor'
1112
import { logger } from '@/util/logger'
1213

1314
export function register() {
@@ -45,4 +46,6 @@ export function register() {
4546
})
4647

4748
logger.info({}, '[Instrumentation] Global error handlers registered')
49+
50+
startFireworksMonitor()
4851
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { describe, expect, test } from 'bun:test'
2+
import { NextResponse } from 'next/server'
3+
4+
import { getFireworksHealth } from '../_get'
5+
6+
import type { FireworksHealthSnapshot } from '@/server/fireworks-monitor/types'
7+
8+
function snapshot(
9+
overall: FireworksHealthSnapshot['overall'],
10+
): FireworksHealthSnapshot {
11+
return {
12+
scrapedAt: 1000,
13+
ageMs: 0,
14+
overall,
15+
deployments: {},
16+
lastError: null,
17+
}
18+
}
19+
20+
const allowAdmin = async () => ({ id: 'admin-user', email: 'admin@example.com' })
21+
const forbidAdmin = async () =>
22+
NextResponse.json({ error: 'Forbidden - not an admin' }, { status: 403 })
23+
24+
describe('/api/admin/fireworks-health', () => {
25+
test('returns 403 when caller is not an admin', async () => {
26+
const response = await getFireworksHealth({
27+
getSnapshot: () => snapshot('healthy'),
28+
checkAdminAuth: forbidAdmin,
29+
})
30+
expect(response.status).toBe(403)
31+
})
32+
33+
test('returns 200 with snapshot when overall is healthy', async () => {
34+
const response = await getFireworksHealth({
35+
getSnapshot: () => snapshot('healthy'),
36+
checkAdminAuth: allowAdmin,
37+
})
38+
expect(response.status).toBe(200)
39+
const body = await response.json()
40+
expect(body.overall).toBe('healthy')
41+
})
42+
43+
test('returns 200 when degraded', async () => {
44+
const response = await getFireworksHealth({
45+
getSnapshot: () => snapshot('degraded'),
46+
checkAdminAuth: allowAdmin,
47+
})
48+
expect(response.status).toBe(200)
49+
})
50+
51+
test('returns 200 when unknown (no scrape yet)', async () => {
52+
const response = await getFireworksHealth({
53+
getSnapshot: () => snapshot('unknown'),
54+
checkAdminAuth: allowAdmin,
55+
})
56+
expect(response.status).toBe(200)
57+
})
58+
59+
test('returns 503 when overall is unhealthy', async () => {
60+
const response = await getFireworksHealth({
61+
getSnapshot: () => snapshot('unhealthy'),
62+
checkAdminAuth: allowAdmin,
63+
})
64+
expect(response.status).toBe(503)
65+
})
66+
})
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { NextResponse } from 'next/server'
2+
3+
import type { FireworksHealthSnapshot } from '@/server/fireworks-monitor/types'
4+
5+
export interface FireworksHealthDeps {
6+
getSnapshot: () => FireworksHealthSnapshot
7+
checkAdminAuth: () => Promise<unknown>
8+
}
9+
10+
export async function getFireworksHealth({
11+
getSnapshot,
12+
checkAdminAuth,
13+
}: FireworksHealthDeps) {
14+
const authResult = await checkAdminAuth()
15+
if (authResult instanceof NextResponse) {
16+
return authResult
17+
}
18+
19+
const snapshot = getSnapshot()
20+
const httpStatus = snapshot.overall === 'unhealthy' ? 503 : 200
21+
return NextResponse.json(snapshot, { status: httpStatus })
22+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import { getFireworksHealth } from './_get'
2+
3+
import { checkAdminAuth } from '@/lib/admin-auth'
4+
import { getFireworksHealthSnapshot } from '@/server/fireworks-monitor/monitor'
5+
6+
export const GET = () => {
7+
return getFireworksHealth({
8+
getSnapshot: getFireworksHealthSnapshot,
9+
checkAdminAuth,
10+
})
11+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* Static Fireworks deployment config.
3+
*
4+
* Kept in its own module (no imports) so it is safe to pull into edge-runtime
5+
* code paths — e.g. instrumentation.ts — without dragging in the server-only
6+
* modules that fireworks.ts transitively depends on (bigquery, undici, etc).
7+
*/
8+
9+
export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
10+
11+
export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
12+
// 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
13+
'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2',
14+
'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
15+
}

web/src/llm-api/fireworks.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
44
import { getErrorObject } from '@codebuff/common/util/error'
55
import { env } from '@codebuff/internal/env'
66

7+
import { FIREWORKS_DEPLOYMENT_MAP } from './fireworks-config'
78
import {
89
consumeCreditsForMessage,
910
extractRequestMetadata,
@@ -37,13 +38,6 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
3738
/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
3839
const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
3940

40-
/** Custom deployment IDs for models with dedicated Fireworks deployments */
41-
const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
42-
// 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
43-
'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2',
44-
'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
45-
}
46-
4741
/** Check if current time is within deployment hours (always enabled) */
4842
export function isDeploymentHours(_now: Date = new Date()): boolean {
4943
return true

0 commit comments

Comments
 (0)