77 * to measure how well Fireworks caches the shared prefix across turns.
88 *
99 * Usage:
10- * bun scripts/test-fireworks-long.ts
10+ * bun scripts/test-fireworks-long.ts [model] [--deployment]
11+ *
12+ * Models:
13+ * glm-5.1 (default) — z-ai/glm-5.1
14+ * minimax — minimax/minimax-m2.5
15+ *
16+ * Flags:
17+ * --deployment Use custom deployment instead of serverless (standard API)
18+ * Serverless is the default
1119 */
1220
1321export { }
1422
1523const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
16- const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
17- // const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
1824
19- // Pricing constants — https://fireworks.ai/pricing
20- const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
21- const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
22- const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
25+ type ModelConfig = {
26+ id : string // OpenRouter-style ID (for display)
27+ standardModel : string // Fireworks standard API model ID
28+ deploymentModel : string // Fireworks custom deployment model ID
29+ inputCostPerToken : number
30+ cachedInputCostPerToken : number
31+ outputCostPerToken : number
32+ }
33+
34+ const MODEL_CONFIGS : Record < string , ModelConfig > = {
35+ 'glm-5.1' : {
36+ id : 'z-ai/glm-5.1' ,
37+ standardModel : 'accounts/fireworks/models/glm-5p1' ,
38+ deploymentModel : 'accounts/james-65d217/deployments/mjb4i7ea' ,
39+ inputCostPerToken : 1.40 / 1_000_000 ,
40+ cachedInputCostPerToken : 0.26 / 1_000_000 ,
41+ outputCostPerToken : 4.40 / 1_000_000 ,
42+ } ,
43+ minimax : {
44+ id : 'minimax/minimax-m2.5' ,
45+ standardModel : 'accounts/fireworks/models/minimax-m2p5' ,
46+ deploymentModel : 'accounts/james-65d217/deployments/lnfid5h9' ,
47+ inputCostPerToken : 0.30 / 1_000_000 ,
48+ cachedInputCostPerToken : 0.03 / 1_000_000 ,
49+ outputCostPerToken : 1.20 / 1_000_000 ,
50+ } ,
51+ }
52+
53+ const DEFAULT_MODEL = 'glm-5.1'
54+
55+ function getModelConfig ( modelArg ?: string ) : ModelConfig {
56+ const key = modelArg ?? DEFAULT_MODEL
57+ const config = MODEL_CONFIGS [ key ]
58+ if ( ! config ) {
59+ console . error ( `❌ Unknown model: "${ key } ". Available models: ${ Object . keys ( MODEL_CONFIGS ) . join ( ', ' ) } ` )
60+ process . exit ( 1 )
61+ }
62+ return config
63+ }
64+
65+ const USE_DEPLOYMENT = process . argv . includes ( '--deployment' )
66+ const modelArg = process . argv . find ( ( a , i ) => i > 1 && ! a . startsWith ( '-' ) && a !== 'long' )
67+ const MODEL = getModelConfig ( modelArg )
68+
69+ // Default to serverless (standard API); use --deployment for custom deployment
70+ const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL . deploymentModel : MODEL . standardModel
71+ const INPUT_COST_PER_TOKEN = MODEL . inputCostPerToken
72+ const CACHED_INPUT_COST_PER_TOKEN = MODEL . cachedInputCostPerToken
73+ const OUTPUT_COST_PER_TOKEN = MODEL . outputCostPerToken
2374
2475const MAX_TOKENS = 100
2576
@@ -39,9 +90,9 @@ function computeCost(usage: Record<string, unknown>): { cost: number; breakdown:
3990 const totalCost = inputCost + cachedCost + outputCost
4091
4192 const breakdown = [
42- `${ nonCachedInput } non-cached input × $0.30 /M = $${ inputCost . toFixed ( 8 ) } ` ,
43- `${ cachedTokens } cached input × $0.03 /M = $${ cachedCost . toFixed ( 8 ) } ` ,
44- `${ outputTokens } output × $1.20 /M = $${ outputCost . toFixed ( 8 ) } ` ,
93+ `${ nonCachedInput } non-cached input × $${ ( INPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M = $${ inputCost . toFixed ( 8 ) } ` ,
94+ `${ cachedTokens } cached input × $${ ( CACHED_INPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M = $${ cachedCost . toFixed ( 8 ) } ` ,
95+ `${ outputTokens } output × $${ ( OUTPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M = $${ outputCost . toFixed ( 8 ) } ` ,
4596 `Total: $${ totalCost . toFixed ( 8 ) } ` ,
4697 ] . join ( '\n ' )
4798
@@ -270,11 +321,11 @@ async function main() {
270321
271322 console . log ( '🧪 Fireworks 10-Turn Conversation Caching Test' )
272323 console . log ( '=' . repeat ( 60 ) )
273- console . log ( `Model: ${ FIREWORKS_MODEL } ` )
324+ console . log ( `Model: ${ MODEL . id } ( ${ FIREWORKS_MODEL } ) [ ${ USE_DEPLOYMENT ? 'deployment' : 'serverless' } ] ` )
274325 console . log ( `Base URL: ${ FIREWORKS_BASE_URL } ` )
275326 console . log ( `Max tokens: ${ MAX_TOKENS } (low output per turn)` )
276327 console . log ( `Turns: ${ TURN_PROMPTS . length } ` )
277- console . log ( `Pricing: $0.30 /M input, $0.03 /M cached, $1.20 /M output` )
328+ console . log ( `Pricing: $${ ( INPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M input, $${ ( CACHED_INPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M cached, $${ ( OUTPUT_COST_PER_TOKEN * 1_000_000 ) . toFixed ( 2 ) } /M output` )
278329 console . log ( `Session ID: ${ SESSION_ID } (x-session-affinity header)` )
279330 console . log ( '=' . repeat ( 60 ) )
280331 console . log ( )
0 commit comments