From 633ce94008d1006ddb972b505d3234823c1120a7 Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Sat, 30 May 2026 11:29:59 +0000 Subject: [PATCH 1/2] glm5.1-fp4-mi355x-sglang: bump SGLang ROCm image to v0.5.12.post1-20260529 Fixes the GSM8K accuracy regression reported in sgl-project/sglang#25742 (v0.5.12-20260517 dropped to ~0.32 at TP=2). Local eval-only runs with this new image recover to gsm8k strict-match 0.975 at TP=2/conc=64 and 0.974 at TP=4/conc=16. --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 3544aad49..aba66160b 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -684,7 +684,7 @@ glm5-fp8-mi355x-atom: - { tp: 8, conc-start: 4, conc-end: 256 } glm5.1-fp4-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 + image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 model: amd/GLM-5.1-MXFP4 model-prefix: glm5.1 runner: mi355x From 857aeeae4f7441dceacfac46b30ade2a3ba58ada Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Sat, 30 May 2026 11:41:33 +0000 Subject: [PATCH 2/2] Update Perf-Changelog --- perf-changelog.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c86b1d830..2afe61dbe 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3220,3 +3220,11 @@ description: - "Update GB300 FP4 GLM-5 8k1k low-latency sweep to mirror NVIDIA/srt-slurm#175: add a 5th 1p17d topology (decode_nodes/workers=17), and lower decode max-running-requests / cuda-graph-max-bs / benchmark concurrency per-zip-index from a flat 4096/1024 to 128/64/32/16/1 (mrr & cuda-graph) and 128/64/32/16/12 (concurrency)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1583 + +- config-keys: + - glm5.1-fp4-mi355x-sglang + description: + - "Bump SGLang ROCm image from v0.5.10rc0-rocm720-mi35x-20260415 to v0.5.12.post1-rocm720-mi35x-20260529" + - "Picks up the fix for the GSM8K accuracy regression reported in sgl-project/sglang#25742 (v0.5.12-20260517 collapsed to ~0.32 at TP=2)" + - "Local eval-only runs on MI355X recover to gsm8k strict-match 0.975 at TP=2/conc=64 and 0.974 at TP=4/conc=16, well above the 0.92 upstream gate added in sgl-project/sglang#26396" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1593