From 633ce94008d1006ddb972b505d3234823c1120a7 Mon Sep 17 00:00:00 2001
From: Chun Fang <chun.fang@amd.com>
Date: Sat, 30 May 2026 11:29:59 +0000
Subject: [PATCH 1/2] glm5.1-fp4-mi355x-sglang: bump SGLang ROCm image to
 v0.5.12.post1-20260529

Fixes the GSM8K accuracy regression reported in sgl-project/sglang#25742
(v0.5.12-20260517 dropped to ~0.32 at TP=2).

Local eval-only runs with this new image recover to gsm8k strict-match 0.975
at TP=2/conc=64 and 0.974 at TP=4/conc=16.
---
 .github/configs/amd-master.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 3544aad49..aba66160b 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -684,7 +684,7 @@ glm5-fp8-mi355x-atom:
       - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
-  image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
+  image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529
   model: amd/GLM-5.1-MXFP4
   model-prefix: glm5.1
   runner: mi355x

From 857aeeae4f7441dceacfac46b30ade2a3ba58ada Mon Sep 17 00:00:00 2001
From: Chun Fang <chun.fang@amd.com>
Date: Sat, 30 May 2026 11:41:33 +0000
Subject: [PATCH 2/2] Update Perf-Changelog

---
 perf-changelog.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index c86b1d830..2afe61dbe 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3220,3 +3220,11 @@
   description:
     - "Update GB300 FP4 GLM-5 8k1k low-latency sweep to mirror NVIDIA/srt-slurm#175: add a 5th 1p17d topology (decode_nodes/workers=17), and lower decode max-running-requests / cuda-graph-max-bs / benchmark concurrency per-zip-index from a flat 4096/1024 to 128/64/32/16/1 (mrr & cuda-graph) and 128/64/32/16/12 (concurrency)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1583
+
+- config-keys:
+    - glm5.1-fp4-mi355x-sglang
+  description:
+    - "Bump SGLang ROCm image from v0.5.10rc0-rocm720-mi35x-20260415 to v0.5.12.post1-rocm720-mi35x-20260529"
+    - "Picks up the fix for the GSM8K accuracy regression reported in sgl-project/sglang#25742 (v0.5.12-20260517 collapsed to ~0.32 at TP=2)"
+    - "Local eval-only runs on MI355X recover to gsm8k strict-match 0.975 at TP=2/conc=64 and 0.974 at TP=4/conc=16, well above the 0.92 upstream gate added in sgl-project/sglang#26396"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1593