From 0c29cf508d72e4609b68c58120d948bd3558d84b Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 23 Apr 2026 06:47:32 +0000 Subject: [PATCH] meta-nvidia: bump NVIDIA stack to 595.58.03 Tested on RTX PRO 6000 Blackwell Server Edition (10de:2bb5) with TDX CC mode ON: nvidia-smi reports driver 595.58.03 + CUDA 13.2, GPU initializes cleanly without RmInitAdapter errors seen on 570.x. --- meta-dstack/conf/distro/dstack.conf | 2 +- .../nvidia/libnvidia-nscq_595.58.03.bb | 32 ++++++++++ .../nvidia/nvidia-fabricmanager_595.58.03.bb | 61 +++++++++++++++++++ .../nvidia/nvidia_595.58.03.bb | 24 ++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 meta-nvidia/recipes-graphics/nvidia/libnvidia-nscq_595.58.03.bb create mode 100644 meta-nvidia/recipes-graphics/nvidia/nvidia-fabricmanager_595.58.03.bb create mode 100644 meta-nvidia/recipes-graphics/nvidia/nvidia_595.58.03.bb diff --git a/meta-dstack/conf/distro/dstack.conf b/meta-dstack/conf/distro/dstack.conf index 5da1983..83577c4 100644 --- a/meta-dstack/conf/distro/dstack.conf +++ b/meta-dstack/conf/distro/dstack.conf @@ -26,7 +26,7 @@ PREFERRED_VERSION_cargo-bin-cross-x86_64 = "1.92.0" # NVIDIA driver stack (only consulted when nvidia flavor is built). # Bump all three together — kernel module ABI is paired with userspace libs. -NVIDIA_VERSION = "580.95.05" +NVIDIA_VERSION = "595.58.03" PREFERRED_VERSION_nvidia = "${NVIDIA_VERSION}" PREFERRED_VERSION_nvidia-fabricmanager = "${NVIDIA_VERSION}" PREFERRED_VERSION_libnvidia-nscq = "${NVIDIA_VERSION}" diff --git a/meta-nvidia/recipes-graphics/nvidia/libnvidia-nscq_595.58.03.bb b/meta-nvidia/recipes-graphics/nvidia/libnvidia-nscq_595.58.03.bb new file mode 100644 index 0000000..1a72f6d --- /dev/null +++ b/meta-nvidia/recipes-graphics/nvidia/libnvidia-nscq_595.58.03.bb @@ -0,0 +1,32 @@ +SUMMARY = "NVIDIA NSCQ library" +DESCRIPTION = "NVIDIA NSCQ (NVIDIA System Communication Queue) library for NVIDIA GPU systems" +HOMEPAGE = "https://developer.nvidia.com/" +LICENSE = "NVIDIA-Proprietary" +LIC_FILES_CHKSUM = "file://LICENSE;md5=2cc00be68c1227a7c42ff3620ef75d05" + +SRC_URI = "https://developer.download.nvidia.cn/compute/nvidia-driver/redist/libnvidia_nscq/linux-x86_64/libnvidia_nscq-linux-x86_64-${PV}-archive.tar.xz" +SRC_URI[md5sum] = "5b559a614f2c2c79b6eb796b68b6fb0f" +SRC_URI[sha256sum] = "1c008787111fc55a7fc1d3521a6d41c2523b8949354e5651eadb4498554e5469" + +S = "${WORKDIR}/libnvidia_nscq-linux-x86_64-${PV}-archive" + +INSANE_SKIP:${PN} = "already-stripped ldflags" + +do_configure[noexec] = "1" +do_compile[noexec] = "1" + +do_install() { + install -d ${D}${libdir} + + install -m 0755 ${S}/lib/libnvidia-nscq.so.${PV} ${D}${libdir} + ln -sf libnvidia-nscq.so.${PV} ${D}${libdir}/libnvidia-nscq.so.2.0 + ln -sf libnvidia-nscq.so.2.0 ${D}${libdir}/libnvidia-nscq.so.2 + ln -sf libnvidia-nscq.so.2 ${D}${libdir}/libnvidia-nscq.so +} + +FILES:${PN} = "\ + ${libdir}/libnvidia-nscq.so.${PV} \ + ${libdir}/libnvidia-nscq.so.2.0 \ + ${libdir}/libnvidia-nscq.so.2 \ + ${libdir}/libnvidia-nscq.so \ +" diff --git a/meta-nvidia/recipes-graphics/nvidia/nvidia-fabricmanager_595.58.03.bb b/meta-nvidia/recipes-graphics/nvidia/nvidia-fabricmanager_595.58.03.bb new file mode 100644 index 0000000..90ac8f4 --- /dev/null +++ b/meta-nvidia/recipes-graphics/nvidia/nvidia-fabricmanager_595.58.03.bb @@ -0,0 +1,61 @@ +SUMMARY = "NVIDIA Fabric Manager for NVSwitch systems" +DESCRIPTION = "NVIDIA Fabric Manager provides NVSwitch management for NVIDIA HGX and DGX systems" +HOMEPAGE = "https://developer.nvidia.com/" +LICENSE = "NVIDIA-Proprietary" +LIC_FILES_CHKSUM = "file://LICENSE;md5=2cc00be68c1227a7c42ff3620ef75d05" + +SRC_URI = "https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-${PV}-archive.tar.xz" +SRC_URI[md5sum] = "8761b4b5f371b7e87b2b82141c901311" +SRC_URI[sha256sum] = "ea86951987ced2c2bbed1a9a9037e917dd2268add0ec484db9216e843d8b7de8" + +S = "${WORKDIR}/fabricmanager-linux-x86_64-${PV}-archive" + +DEPENDS = "" +RDEPENDS:${PN} = "bash zlib" + +INSANE_SKIP:${PN} = "already-stripped ldflags" + +do_configure[noexec] = "1" +do_compile[noexec] = "1" + +inherit systemd + +SYSTEMD_AUTO_ENABLE = "enable" +SYSTEMD_SERVICE:${PN} = "nvidia-fabricmanager.service" + +do_install() { + # Create directories + install -d ${D}${bindir} + install -d ${D}${libdir} + install -d ${D}${datadir}/nvidia/nvswitch + install -d ${D}${systemd_system_unitdir} + + # Install binaries + install -m 0755 ${S}/bin/nv-fabricmanager ${D}${bindir} + install -m 0755 ${S}/bin/nvidia-fabricmanager-start.sh ${D}${bindir} + install -m 0755 ${S}/bin/nvswitch-audit ${D}${bindir} + + # Install libraries + install -m 0644 ${S}/lib/libnvfm.so.1 ${D}${libdir} + ln -sf libnvfm.so.1 ${D}${libdir}/libnvfm.so + + # Install config + topology files (glob picks up new SKUs in future archives) + install -m 0644 ${S}/etc/fabricmanager.cfg ${D}${datadir}/nvidia/nvswitch/ + install -m 0644 ${S}/etc/fabricmanager_multinode.cfg ${D}${datadir}/nvidia/nvswitch/ + for f in ${S}/share/nvidia/nvswitch/*; do + [ -f "$f" ] && install -m 0644 "$f" ${D}${datadir}/nvidia/nvswitch/ + done + + # Install systemd service + install -m 0644 ${S}/systemd/nvidia-fabricmanager.service ${D}${systemd_system_unitdir} +} + +FILES:${PN} = "\ + ${bindir}/nv-fabricmanager \ + ${bindir}/nvidia-fabricmanager-start.sh \ + ${bindir}/nvswitch-audit \ + ${libdir}/libnvfm.so.1 \ + ${libdir}/libnvfm.so \ + ${datadir}/nvidia/nvswitch/* \ + ${systemd_system_unitdir}/nvidia-fabricmanager.service \ +" diff --git a/meta-nvidia/recipes-graphics/nvidia/nvidia_595.58.03.bb b/meta-nvidia/recipes-graphics/nvidia/nvidia_595.58.03.bb new file mode 100644 index 0000000..8c0ab3d --- /dev/null +++ b/meta-nvidia/recipes-graphics/nvidia/nvidia_595.58.03.bb @@ -0,0 +1,24 @@ +SUMMARY = "NVidia Graphics Driver" +LICENSE = "NVIDIA-Proprietary" +LIC_FILES_CHKSUM = "file://../LICENSE;md5=92aa2e2af6aa0bcba1c3fe49da021937" + +NVIDIA_ARCHIVE_NAME = "NVIDIA-Linux-${TARGET_ARCH}-${PV}" +NVIDIA_SRC = "${WORKDIR}/${NVIDIA_ARCHIVE_NAME}" +SRC_URI = " \ + https://us.download.nvidia.com/tesla/${PV}/${NVIDIA_ARCHIVE_NAME}.run \ +" +SRC_URI[md5sum] = "8d98a183bf994af0ff19980e0ef430f2" +SRC_URI[sha256sum] = "8c0d4f967b7932c4ab5714272aee8103392b0a702c92afa555176d36205829f9" + +RDEPENDS:${PN} = "nvidia-modprobe-config" + +do_unpack() { + chmod +x ${DL_DIR}/${NVIDIA_ARCHIVE_NAME}.run + rm -rf ${NVIDIA_SRC} + ${DL_DIR}/${NVIDIA_ARCHIVE_NAME}.run -x --target ${NVIDIA_SRC} +} + +do_make_scripts[noexec] = "1" + +include nvidia-kernel-module.inc +include nvidia-libs.inc