File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
web/src/server/free-session Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -18,12 +18,12 @@ export type FireworksHealth = 'healthy' | 'degraded' | 'unhealthy'
1818/** Degrade once median prefill-queue latency crosses this bound. Strict by
1919 * design — a 1s queue on top of ~1s prefill already means users feel 2s+
2020 * before first token. */
21- export const PREFILL_QUEUE_DEGRADED_MS = 600
21+ export const PREFILL_QUEUE_DEGRADED_MS = 200
2222
2323/** Leading indicator of load — responds instantly to memory pressure, while
2424 * prefill-queue p50 is a lagging window statistic. Degrading here lets us
2525 * halt admission *before* users feel it. */
26- export const KV_BLOCKS_DEGRADED_FRACTION = 0.9
26+ export const KV_BLOCKS_DEGRADED_FRACTION = 0.8
2727
2828/** Hard backstop: if KV block memory gets this full, evictions dominate and
2929 * even the median request will start stalling. */
You can’t perform that action at this time.
0 commit comments