Skip to content

Commit dd44f02

Browse files
committed
Update test fireworks to have model/deployment options
1 parent c0a776f commit dd44f02

File tree

1 file changed

+63
-12
lines changed

1 file changed

+63
-12
lines changed

scripts/test-fireworks-long.ts

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,70 @@
77
* to measure how well Fireworks caches the shared prefix across turns.
88
*
99
* Usage:
10-
* bun scripts/test-fireworks-long.ts
10+
* bun scripts/test-fireworks-long.ts [model] [--deployment]
11+
*
12+
* Models:
13+
* glm-5.1 (default) — z-ai/glm-5.1
14+
* minimax — minimax/minimax-m2.5
15+
*
16+
* Flags:
17+
* --deployment Use custom deployment instead of serverless (standard API)
18+
* Serverless is the default
1119
*/
1220

1321
export { }
1422

1523
const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
16-
const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
17-
// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
1824

19-
// Pricing constants — https://fireworks.ai/pricing
20-
const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
21-
const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
22-
const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
25+
type ModelConfig = {
26+
id: string // OpenRouter-style ID (for display)
27+
standardModel: string // Fireworks standard API model ID
28+
deploymentModel: string // Fireworks custom deployment model ID
29+
inputCostPerToken: number
30+
cachedInputCostPerToken: number
31+
outputCostPerToken: number
32+
}
33+
34+
const MODEL_CONFIGS: Record<string, ModelConfig> = {
35+
'glm-5.1': {
36+
id: 'z-ai/glm-5.1',
37+
standardModel: 'accounts/fireworks/models/glm-5p1',
38+
deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
39+
inputCostPerToken: 1.40 / 1_000_000,
40+
cachedInputCostPerToken: 0.26 / 1_000_000,
41+
outputCostPerToken: 4.40 / 1_000_000,
42+
},
43+
minimax: {
44+
id: 'minimax/minimax-m2.5',
45+
standardModel: 'accounts/fireworks/models/minimax-m2p5',
46+
deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9',
47+
inputCostPerToken: 0.30 / 1_000_000,
48+
cachedInputCostPerToken: 0.03 / 1_000_000,
49+
outputCostPerToken: 1.20 / 1_000_000,
50+
},
51+
}
52+
53+
const DEFAULT_MODEL = 'glm-5.1'
54+
55+
function getModelConfig(modelArg?: string): ModelConfig {
56+
const key = modelArg ?? DEFAULT_MODEL
57+
const config = MODEL_CONFIGS[key]
58+
if (!config) {
59+
console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
60+
process.exit(1)
61+
}
62+
return config
63+
}
64+
65+
const USE_DEPLOYMENT = process.argv.includes('--deployment')
66+
const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long')
67+
const MODEL = getModelConfig(modelArg)
68+
69+
// Default to serverless (standard API); use --deployment for custom deployment
70+
const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
71+
const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
72+
const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
73+
const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
2374

2475
const MAX_TOKENS = 100
2576

@@ -39,9 +90,9 @@ function computeCost(usage: Record<string, unknown>): { cost: number; breakdown:
3990
const totalCost = inputCost + cachedCost + outputCost
4091

4192
const breakdown = [
42-
`${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`,
43-
`${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`,
44-
`${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`,
93+
`${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`,
94+
`${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`,
95+
`${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`,
4596
`Total: $${totalCost.toFixed(8)}`,
4697
].join('\n ')
4798

@@ -270,11 +321,11 @@ async function main() {
270321

271322
console.log('🧪 Fireworks 10-Turn Conversation Caching Test')
272323
console.log('='.repeat(60))
273-
console.log(`Model: ${FIREWORKS_MODEL}`)
324+
console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`)
274325
console.log(`Base URL: ${FIREWORKS_BASE_URL}`)
275326
console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`)
276327
console.log(`Turns: ${TURN_PROMPTS.length}`)
277-
console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`)
328+
console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`)
278329
console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`)
279330
console.log('='.repeat(60))
280331
console.log()

0 commit comments

Comments
 (0)