diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..3550a30f --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.github/workflows/container_images.yml b/.github/workflows/container_images.yml index 4fe650bd..d3d089ea 100644 --- a/.github/workflows/container_images.yml +++ b/.github/workflows/container_images.yml @@ -31,16 +31,19 @@ jobs: arch: arm64 variance: - name: Ubuntu-24.04/CUDA-12.8.1 - image: "rust-gpu/rust-cuda-ubuntu24-cuda12" + image: "rust-cuda-ubuntu24-cuda12" dockerfile: ./container/ubuntu24-cuda12/Dockerfile - name: Ubuntu-24.04/CUDA-13.0.2 - image: "rust-gpu/rust-cuda-ubuntu24-cuda13" + image: "rust-cuda-ubuntu24-cuda13" dockerfile: ./container/ubuntu24-cuda13/Dockerfile + - name: Ubuntu-24.04/CUDA-13.2.1/LLVM-19.1.7 + image: "rust-cuda-ubuntu24-cuda13-llvm19" + dockerfile: ./container/ubuntu24-cuda13-llvm19/Dockerfile - name: RockyLinux-9/CUDA-12.8.1 - image: "rust-gpu/rust-cuda-rockylinux9-cuda12" + image: "rust-cuda-rockylinux9-cuda12" dockerfile: ./container/rockylinux9-cuda12/Dockerfile - name: RockyLinux-9/CUDA-13.0.2 - image: "rust-gpu/rust-cuda-rockylinux9-cuda13" + image: "rust-cuda-rockylinux9-cuda13" dockerfile: ./container/rockylinux9-cuda13/Dockerfile steps: - name: Free up space @@ -86,6 +89,8 @@ jobs: df -h - name: Checkout repository uses: actions/checkout@v4 + - name: Set lowercase repo owner + run: echo "REPO_OWNER=$(echo ${{ github.repository_owner }} | tr A-Z a-z)" >> $GITHUB_ENV - name: Validate platform run: | ARCH=$(uname -m) @@ -108,7 +113,7 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ matrix.variance.image }} + images: ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ matrix.variance.image }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and push by digest @@ -119,15 +124,12 @@ jobs: file: ${{ matrix.variance.dockerfile }} platforms: linux/${{ matrix.platform.arch }} labels: ${{ steps.meta.outputs.labels }} - outputs: type=image,name=${{ env.REGISTRY }}/${{ matrix.variance.image }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ matrix.variance.image }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} cache-from: type=gha cache-to: type=gha,mode=max - name: Set artifact name if: github.event_name != 'pull_request' - run: | - ARTIFACT_NAME="${{ matrix.variance.image }}" - ARTIFACT_NAME="${ARTIFACT_NAME#*/}" # Remove everything before and including the slash - echo "ARTIFACT_NAME=$ARTIFACT_NAME" >> $GITHUB_ENV + run: echo "ARTIFACT_NAME=${{ matrix.variance.image }}" >> $GITHUB_ENV - name: Export digest if: github.event_name != 'pull_request' run: | @@ -158,19 +160,20 @@ jobs: matrix: variance: - name: Ubuntu-24.04/CUDA-12.8.1 - image: "rust-gpu/rust-cuda-ubuntu24-cuda12" + image: "rust-cuda-ubuntu24-cuda12" - name: Ubuntu-24.04/CUDA-13.0.2 - image: "rust-gpu/rust-cuda-ubuntu24-cuda13" + image: "rust-cuda-ubuntu24-cuda13" + - name: Ubuntu-24.04/CUDA-13.2.1/LLVM-19.1.7 + image: "rust-cuda-ubuntu24-cuda13-llvm19" - name: RockyLinux-9/CUDA-12.8.1 - image: "rust-gpu/rust-cuda-rockylinux9-cuda12" + image: "rust-cuda-rockylinux9-cuda12" - name: RockyLinux-9/CUDA-13.0.2 - image: "rust-gpu/rust-cuda-rockylinux9-cuda13" + image: "rust-cuda-rockylinux9-cuda13" steps: + - name: Set lowercase repo owner + run: echo "REPO_OWNER=$(echo ${{ github.repository_owner }} | tr A-Z a-z)" >> $GITHUB_ENV - name: Set artifact name - run: | - ARTIFACT_NAME="${{ matrix.variance.image }}" - ARTIFACT_NAME="${ARTIFACT_NAME#*/}" # Remove everything before and including the slash - echo "ARTIFACT_NAME=$ARTIFACT_NAME" >> $GITHUB_ENV + run: echo "ARTIFACT_NAME=${{ matrix.variance.image }}" >> $GITHUB_ENV - name: Download digests uses: actions/download-artifact@v4 with: @@ -183,7 +186,7 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ matrix.variance.image }} + images: ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ matrix.variance.image }} tags: | type=ref,event=branch type=ref,event=pr @@ -202,7 +205,7 @@ jobs: working-directory: /tmp/digests run: | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ - $(printf '${{ env.REGISTRY }}/${{ matrix.variance.image }}@sha256:%s ' *) + $(printf '${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ matrix.variance.image }}@sha256:%s ' *) - name: Inspect image run: | - docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ matrix.variance.image }}:${{ steps.meta.outputs.version }} + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ matrix.variance.image }}:${{ steps.meta.outputs.version }} diff --git a/.gitignore b/.gitignore index 641bcefd..6afa29fe 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ book rustc-ice-*.txt .nix-driver-libs .claude +.direnv diff --git a/container/ubuntu24-cuda13-llvm19/Dockerfile b/container/ubuntu24-cuda13-llvm19/Dockerfile new file mode 100644 index 00000000..db4edc9e --- /dev/null +++ b/container/ubuntu24-cuda13-llvm19/Dockerfile @@ -0,0 +1,99 @@ +FROM nvcr.io/nvidia/cuda:13.2.1-cudnn-devel-ubuntu24.04 AS llvm-builder + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \ + build-essential \ + clang \ + curl \ + libffi-dev \ + libedit-dev \ + libncurses5-dev \ + libssl-dev \ + libtinfo-dev \ + libxml2-dev \ + cmake \ + ninja-build \ + pkg-config \ + python3 \ + xz-utils \ + zlib1g-dev && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /data/llvm19 + +# Download and build LLVM 19.1.7 (the active LLVM 19 pin used by `rustc_codegen_nvvm`). +# LLVM 8+ ships as a monorepo tarball; cmake source root is the `llvm/` subdir. +RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.7/llvm-project-19.1.7.src.tar.xz && \ + tar -xf llvm-project-19.1.7.src.tar.xz && \ + cd llvm-project-19.1.7.src && \ + mkdir build && cd build && \ + ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + TARGETS="X86;NVPTX"; \ + else \ + TARGETS="AArch64;NVPTX"; \ + fi && \ + cmake -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="$TARGETS" \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_ASSERTIONS=OFF \ + -DLLVM_ENABLE_BINDINGS=OFF \ + -DLLVM_INCLUDE_EXAMPLES=OFF \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DLLVM_INCLUDE_BENCHMARKS=OFF \ + -DLLVM_ENABLE_ZLIB=ON \ + -DLLVM_ENABLE_TERMINFO=ON \ + -DCMAKE_INSTALL_PREFIX=/opt/llvm-19 \ + ../llvm && \ + ninja -j$(nproc) && \ + ninja install && \ + cd ../.. && \ + rm -rf llvm-project-19.1.7.src* + +FROM nvcr.io/nvidia/cuda:13.2.1-cudnn-devel-ubuntu24.04 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \ + build-essential \ + clang \ + curl \ + libssl-dev \ + libtinfo-dev \ + pkg-config \ + xz-utils \ + zlib1g-dev \ + cmake \ + libfontconfig-dev \ + libx11-xcb-dev \ + libxcursor-dev \ + libxi-dev \ + libxinerama-dev \ + libxrandr-dev && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=llvm-builder /opt/llvm-19 /opt/llvm-19 +RUN ln -s /opt/llvm-19/bin/llvm-config /usr/bin/llvm-config && \ + ln -s /opt/llvm-19/bin/llvm-config /usr/bin/llvm-config-19 && \ + ln -s /opt/llvm-19/bin/llvm-as /usr/bin/llvm-as-19 + +# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below) +RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none +ENV PATH="/root/.cargo/bin:${PATH}" + +# Setup the workspace +WORKDIR /data/rust-cuda +RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \ + rustup show + +# Add nvvm + LLVM 19 dylib to the runtime linker path. +ENV LD_LIBRARY_PATH="/opt/llvm-19/lib:/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" + +# `rustc_codegen_nvvm`'s build.rs probes `LLVM_CONFIG_19` to locate the LLVM 19 +# toolchain when the `llvm19` cargo feature is on. The feature itself is gated; +# downstream crates that depend on `cuda_builder` must build with +# `--features llvm19` for this to take effect — that propagates through to +# `nvvm/llvm19` (default `NvvmArch` = Blackwell) and `rustc_codegen_nvvm/llvm19` +# (LLVM 19 codegen path) per crates/cuda_builder/Cargo.toml. +ENV LLVM_CONFIG_19=/opt/llvm-19/bin/llvm-config +ENV LLVM_LINK_STATIC=1 +ENV RUST_LOG=info diff --git a/crates/cuda_std/src/warp.rs b/crates/cuda_std/src/warp.rs index 6f3e13ab..75da1d09 100644 --- a/crates/cuda_std/src/warp.rs +++ b/crates/cuda_std/src/warp.rs @@ -313,20 +313,20 @@ unsafe fn match_any_64(mask: u32, value: u64) -> u32 { #[inline(always)] unsafe fn match_all_32(mask: u32, value: u32) -> (u32, bool) { unsafe extern "C" { - #[allow(improper_ctypes)] - fn __nvvm_warp_match_all_32(mask: u32, value: u32) -> (u32, bool); + // see libintrinsics.ll — packs (value, predicate) into i64 + fn __nvvm_warp_match_all_32(mask: u32, value: u32) -> u64; } - unsafe { __nvvm_warp_match_all_32(mask, value) } + unpack_warp_result(unsafe { __nvvm_warp_match_all_32(mask, value) }) } #[gpu_only] #[inline(always)] unsafe fn match_all_64(mask: u32, value: u64) -> (u32, bool) { unsafe extern "C" { - #[allow(improper_ctypes)] - fn __nvvm_warp_match_all_64(mask: u32, value: u64) -> (u32, bool); + // see libintrinsics.ll — packs (value, predicate) into i64 + fn __nvvm_warp_match_all_64(mask: u32, value: u64) -> u64; } - unsafe { __nvvm_warp_match_all_64(mask, value) } + unpack_warp_result(unsafe { __nvvm_warp_match_all_64(mask, value) }) } /// Synchronizes a subset of threads in a warp then performs a reduce-and-broadcast @@ -741,14 +741,16 @@ pub enum WarpShuffleMode { Xor = 3, } -// C-compatible struct to match LLVM IR's {i32, i8} return type -// This fixes an ABI mismatch where Rust would represent (u32, bool) as [2 x i32] -// but the LLVM intrinsic returns {i32, i8} (a struct, not an array) -#[doc(hidden)] -#[repr(C)] -pub struct WarpShuffleResult { - value: u32, - predicate: u8, +// The libintrinsics.ll wrappers pack their (value, predicate) result into a +// single i64: low 32 bits = value, bit 32 = predicate. Returning a primitive +// integer avoids the small-aggregate ABI path where rustc attaches `align N` +// to the call's return value — an attribute LLVM 19's verifier rejects on +// non-pointer returns. +// Unused on host targets — every caller is `#[gpu_only]`. +#[allow(dead_code)] +#[inline(always)] +fn unpack_warp_result(packed: u64) -> (u32, bool) { + (packed as u32, (packed >> 32) & 1 != 0) } #[gpu_only] @@ -761,8 +763,7 @@ unsafe fn warp_shuffle_32( ) -> (u32, bool) { unsafe extern "C" { // see libintrinsics.ll - // Returns {i32, i8} in LLVM IR, which maps to our WarpShuffleResult struct - fn __nvvm_warp_shuffle(mask: u32, mode: u32, a: u32, b: u32, c: u32) -> WarpShuffleResult; + fn __nvvm_warp_shuffle(mask: u32, mode: u32, a: u32, b: u32, c: u32) -> u64; } assert!( @@ -776,7 +777,7 @@ unsafe fn warp_shuffle_32( c |= (32 - width) << 8; let result = unsafe { __nvvm_warp_shuffle(mask, mode as u32, value, b, c) }; - (result.value, result.predicate != 0) + unpack_warp_result(result) } unsafe fn warp_shuffle_128( diff --git a/crates/rustc_codegen_nvvm/build.rs b/crates/rustc_codegen_nvvm/build.rs index 475f684d..973c3499 100644 --- a/crates/rustc_codegen_nvvm/build.rs +++ b/crates/rustc_codegen_nvvm/build.rs @@ -10,11 +10,38 @@ use curl::easy::Easy; use tar::Archive; use xz::read::XzDecoder; +struct LlvmFlavor { + major: u8, + config_env: &'static str, + default_binary: &'static str, + probe_cuda_home: bool, + prebuilt_url: &'static str, +} + +const LLVM7: LlvmFlavor = LlvmFlavor { + major: 7, + config_env: "LLVM_CONFIG", + default_binary: "llvm-config", + probe_cuda_home: false, + prebuilt_url: PREBUILT_LLVM_URL_LLVM7, +}; + +const LLVM19: LlvmFlavor = LlvmFlavor { + major: 19, + config_env: "LLVM_CONFIG_19", + default_binary: "llvm-config-19", + probe_cuda_home: true, + prebuilt_url: PREBUILT_LLVM_URL_LLVM19, +}; + static PREBUILT_LLVM_URL_LLVM7: &str = - "https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/download/LLVM-7.1.0/"; + "https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/download/llvm-7.1.0/"; +static PREBUILT_LLVM_URL_LLVM19: &str = + "https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/download/llvm-19.1.7/"; fn main() { - rustc_llvm_build(llvm19_enabled()); + let flavor = if llvm19_enabled() { &LLVM19 } else { &LLVM7 }; + rustc_llvm_build(flavor); } fn fail(s: &str) -> ! { @@ -43,10 +70,6 @@ fn llvm19_enabled() -> bool { tracked_env_var_os("CARGO_FEATURE_LLVM19").is_some() } -fn required_major_llvm_version(llvm19_enabled: bool) -> u8 { - if llvm19_enabled { 19 } else { 7 } -} - fn command_version(path: &Path) -> Option { let output = Command::new(path).arg("--version").output().ok()?; if !output.status.success() { @@ -69,98 +92,37 @@ fn llvm_version_matches(path: &Path, required_major: u8) -> bool { } fn sibling_llvm_tool(llvm_config: &Path, tool_prefix: &str) -> Option { - let file_name = llvm_config.file_name()?.to_str()?; - let suffix = file_name.strip_prefix("llvm-config")?; - Some(llvm_config.with_file_name(format!("{tool_prefix}{suffix}"))) + // Ask llvm-config where its install tree lives rather than deriving lexically. + // Lexical derivation breaks when llvm-config is exposed via a single symlink + // into /usr/bin/ but the rest of the toolchain stays in the install prefix + // (e.g. /usr/bin/llvm-config -> /opt/llvm-7/bin/llvm-config, with /opt/llvm-7/bin + // off PATH). It also handles source-built toolchains where tool names are + // unsuffixed (`llvm-as`) versus apt-packaged ones (`llvm-as-19`). + let output = Command::new(llvm_config).arg("--bindir").output().ok()?; + if !output.status.success() { + return None; + } + let bindir = String::from_utf8(output.stdout).ok()?.trim().to_string(); + Some(PathBuf::from(bindir).join(tool_prefix)) } fn target_to_llvm_prebuilt(target: &str) -> String { let base = match target { "x86_64-pc-windows-msvc" => "windows-x86_64", - // NOTE(RDambrosio016): currently disabled because of weird issues with segfaults and building the C++ shim - // "x86_64-unknown-linux-gnu" => "linux-x86_64", + "x86_64-unknown-linux-gnu" => "linux-x86_64", + "aarch64-unknown-linux-gnu" => "linux-aarch64", _ => panic!( - "Unsupported target with no matching prebuilt LLVM: `{target}`, install LLVM and set LLVM_CONFIG" + "Unsupported target with no matching prebuilt LLVM: `{target}`, install LLVM and set LLVM_CONFIG (or LLVM_CONFIG_19 when the `llvm19` feature is enabled)" ), }; format!("{base}.tar.xz") } -fn find_llvm_config(target: &str, llvm19_enabled: bool) -> PathBuf { - if llvm19_enabled { - return find_llvm_config_llvm19(); - } - - find_llvm_config_llvm7(target) -} - -fn find_llvm_config_llvm19() -> PathBuf { - let required_major = required_major_llvm_version(true); - let mut candidates = Vec::new(); - - if let Some(path) = tracked_env_var_os("LLVM_CONFIG_19") { - candidates.push(PathBuf::from(path)); - } - - candidates.push(PathBuf::from("llvm-config-19")); - - if let Some(cuda_home) = tracked_env_var_os("CUDA_HOME") { - let cuda_home = PathBuf::from(cuda_home); - candidates.push(cuda_home.join("nvvm").join("bin").join("llvm-config")); - candidates.push(cuda_home.join("bin").join("llvm-config")); - } - - for candidate in &candidates { - if llvm_version_matches(candidate, required_major) { - return candidate.clone(); - } - } - - let tried = candidates - .iter() - .map(|candidate| format!(" - {}", candidate.display())) - .collect::>() - .join("\n"); - - fail(&format!( - "LLVM 19 support is enabled, but no LLVM 19 toolchain was found.\n\ - Tried:\n{tried}\n\n\ - Set LLVM_CONFIG_19=/path/to/llvm-config from an LLVM 19 installation." - )); -} - -fn find_llvm_config_llvm7(target: &str) -> PathBuf { - let required_major = required_major_llvm_version(false); - // first, if LLVM_CONFIG is set then see if its llvm version if 7.x, if so, use that. - let config_env = tracked_env_var_os("LLVM_CONFIG"); - // if LLVM_CONFIG is not set, try using llvm-config as a normal app in PATH. - let path_to_try = config_env.unwrap_or_else(|| "llvm-config".into()); - - // if USE_PREBUILT_LLVM is set to 1 then download prebuilt llvm without trying llvm-config - if tracked_env_var_os("USE_PREBUILT_LLVM") != Some("1".into()) { - let cmd = Command::new(&path_to_try).arg("--version").output(); - - if let Ok(out) = cmd { - let version = String::from_utf8(out.stdout).unwrap(); - if version.starts_with(&required_major.to_string()) { - return PathBuf::from(path_to_try); - } - println!( - "cargo:warning=Prebuilt llvm-config version does not start with {required_major}" - ); - } else { - println!("cargo:warning=Failed to run prebuilt llvm-config"); - } - } - - // otherwise, download prebuilt LLVM. - println!("cargo:warning=Downloading prebuilt LLVM"); - let mut url = tracked_env_var_os("PREBUILT_LLVM_URL") - .map(|x| x.to_string_lossy().to_string()) - .unwrap_or_else(|| PREBUILT_LLVM_URL_LLVM7.to_string()); - +fn download_prebuilt_llvm(target: &str, base_url: &str) -> PathBuf { let prebuilt_name = target_to_llvm_prebuilt(target); - url = format!("{url}{prebuilt_name}"); + let url = format!("{base_url}{prebuilt_name}"); + + println!("cargo:warning=Downloading prebuilt LLVM from {url}"); let out = env::var("OUT_DIR").expect("OUT_DIR was not set"); let mut easy = Easy::new(); @@ -181,6 +143,13 @@ fn find_llvm_config_llvm7(target: &str) -> PathBuf { .expect("Failed to download prebuilt LLVM"); } + let response_code = easy.response_code().unwrap(); + if response_code != 200 { + fail(&format!( + "Failed to download prebuilt LLVM from {url}. HTTP response code: {response_code}" + )); + } + let decompressor = XzDecoder::new(xz_encoded.as_slice()); let mut ar = Archive::new(decompressor); @@ -194,19 +163,61 @@ fn find_llvm_config_llvm7(target: &str) -> PathBuf { .join(format!("llvm-config{}", std::env::consts::EXE_SUFFIX)) } -fn find_llvm_as_llvm19(llvm_config: &Path) -> PathBuf { - let required_major = required_major_llvm_version(true); +fn find_llvm_config(target: &str, flavor: &LlvmFlavor) -> PathBuf { + // USE_PREBUILT_LLVM=1 skips local probing and goes straight to download. + if tracked_env_var_os("USE_PREBUILT_LLVM") != Some("1".into()) { + let mut candidates = Vec::new(); + + if let Some(path) = tracked_env_var_os(flavor.config_env) { + candidates.push(PathBuf::from(path)); + } + + candidates.push(PathBuf::from(flavor.default_binary)); + + if flavor.probe_cuda_home + && let Some(cuda_home) = tracked_env_var_os("CUDA_HOME") + { + let cuda_home = PathBuf::from(cuda_home); + candidates.push(cuda_home.join("nvvm").join("bin").join("llvm-config")); + candidates.push(cuda_home.join("bin").join("llvm-config")); + } + + for candidate in &candidates { + if llvm_version_matches(candidate, flavor.major) { + return candidate.clone(); + } + } + + let tried = candidates + .iter() + .map(|candidate| format!(" - {}", candidate.display())) + .collect::>() + .join("\n"); + + println!( + "cargo:warning=No matching LLVM {} toolchain found, falling back to prebuilt LLVM. Tried:\n{}", + flavor.major, tried + ); + } + + let url = tracked_env_var_os("PREBUILT_LLVM_URL") + .map(|x| x.to_string_lossy().to_string()) + .unwrap_or_else(|| flavor.prebuilt_url.to_string()); + download_prebuilt_llvm(target, &url) +} + +fn find_llvm_as(llvm_config: &Path, flavor: &LlvmFlavor) -> PathBuf { let mut candidates = Vec::new(); if let Some(path) = sibling_llvm_tool(llvm_config, "llvm-as") { candidates.push(path); } - candidates.push(PathBuf::from("llvm-as-19")); + candidates.push(PathBuf::from(format!("llvm-as-{}", flavor.major))); candidates.push(PathBuf::from("llvm-as")); for candidate in &candidates { - if llvm_version_matches(candidate, required_major) { + if llvm_version_matches(candidate, flavor.major) { return candidate.clone(); } } @@ -218,8 +229,9 @@ fn find_llvm_as_llvm19(llvm_config: &Path) -> PathBuf { .join("\n"); fail(&format!( - "LLVM 19 support is enabled, but llvm-as 19 was not found.\n\ - Tried:\n{tried}" + "LLVM {} support is enabled, but llvm-as {} was not found.\n\ + Tried:\n{tried}", + flavor.major, flavor.major )); } @@ -238,60 +250,49 @@ pub fn tracked_env_var_os + Display>(key: K) -> Option env::var_os(key) } -fn configure_libintrinsics(llvm_config: &Path, llvm19_enabled: bool) { +fn configure_libintrinsics(llvm_config: &Path, flavor: &LlvmFlavor) { let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR was not set")); - // Both paths share `libintrinsics.ll`. The LLVM 7 build consumes the checked-in - // `libintrinsics.bc` (regenerate manually with `llvm-as-7` when the .ll changes). - // The LLVM 19 build assembles the same .ll on the fly with `llvm-as-19`. build_helper::rerun_if_changed(Path::new("libintrinsics.ll")); - if llvm19_enabled { - let input = manifest_dir.join("libintrinsics.ll"); - let output = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR was not set")) - .join("libintrinsics_v19.bc"); - let llvm_as = find_llvm_as_llvm19(llvm_config); - - let status = Command::new(&llvm_as) - .arg(&input) - .arg("-o") - .arg(&output) - .stderr(Stdio::inherit()) - .stdout(Stdio::inherit()) - .status() - .unwrap_or_else(|err| { - fail(&format!( - "failed to execute llvm-as for LLVM 19: {llvm_as:?}\nerror: {err}" - )) - }); - - if !status.success() { + let input = manifest_dir.join("libintrinsics.ll"); + let output = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR was not set")) + .join(format!("libintrinsics_v{}.bc", flavor.major)); + let llvm_as = find_llvm_as(llvm_config, flavor); + + let status = Command::new(&llvm_as) + .arg(&input) + .arg("-o") + .arg(&output) + .stderr(Stdio::inherit()) + .stdout(Stdio::inherit()) + .status() + .unwrap_or_else(|err| { fail(&format!( - "llvm-as did not assemble {} successfully", - input.display() - )); - } - - println!( - "cargo:rustc-env=NVVM_LIBINTRINSICS_BC_PATH={}", - output.display() - ); - } else { - build_helper::rerun_if_changed(Path::new("libintrinsics.bc")); - println!( - "cargo:rustc-env=NVVM_LIBINTRINSICS_BC_PATH={}", - manifest_dir.join("libintrinsics.bc").display() - ); + "failed to execute llvm-as for LLVM {}: {llvm_as:?}\nerror: {err}", + flavor.major + )) + }); + + if !status.success() { + fail(&format!( + "llvm-as did not assemble {} successfully", + input.display() + )); } + + println!( + "cargo:rustc-env=NVVM_LIBINTRINSICS_BC_PATH={}", + output.display() + ); } -fn rustc_llvm_build(llvm19_enabled: bool) { +fn rustc_llvm_build(flavor: &LlvmFlavor) { let target = env::var("TARGET").expect("TARGET was not set"); - let llvm_config = find_llvm_config(&target, llvm19_enabled); - let required_major = required_major_llvm_version(llvm19_enabled); + let llvm_config = find_llvm_config(&target, flavor); - configure_libintrinsics(&llvm_config, llvm19_enabled); + configure_libintrinsics(&llvm_config, flavor); let required_components = &["ipo", "bitreader", "bitwriter", "lto", "nvptx"]; @@ -320,6 +321,12 @@ fn rustc_llvm_build(llvm19_enabled: bool) { if flag.starts_with("-flto") { continue; } + + // if we are on msvc, ignore all -W flags as msvc uses /W and -W is invalid. + if target.contains("msvc") && flag.starts_with("-W") { + continue; + } + // ignore flags that aren't supported in gcc 8 if flag == "-Wcovered-switch-default" { continue; @@ -340,7 +347,7 @@ fn rustc_llvm_build(llvm19_enabled: bool) { cfg.define(&flag, None); } - let llvm_version_major = required_major.to_string(); + let llvm_version_major = flavor.major.to_string(); cfg.define("LLVM_VERSION_MAJOR", Some(llvm_version_major.as_str())); if tracked_env_var_os("LLVM_RUSTLLVM").is_some() { diff --git a/crates/rustc_codegen_nvvm/libintrinsics.bc b/crates/rustc_codegen_nvvm/libintrinsics.bc deleted file mode 100644 index 28693fb4..00000000 Binary files a/crates/rustc_codegen_nvvm/libintrinsics.bc and /dev/null differ diff --git a/crates/rustc_codegen_nvvm/libintrinsics.ll b/crates/rustc_codegen_nvvm/libintrinsics.ll index 6ddb53db..ef60b884 100644 --- a/crates/rustc_codegen_nvvm/libintrinsics.ll +++ b/crates/rustc_codegen_nvvm/libintrinsics.ll @@ -152,44 +152,52 @@ start: } declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) #0 -; Required because we need to explicitly generate { i32, i1 } for the following intrinsics -; except rustc will not generate them (it will make { i32, i8 }) which libnvvm rejects. - -define { i32, i8 } @__nvvm_warp_shuffle(i32, i32, i32, i32, i32) #1 { +; NVVM intrinsics return { i32, i1 }, but rustc lowering of (u32, bool) — or any +; small two-field aggregate — produces { i32, i8 }, which libnvvm rejects. We +; used to bridge by re-packing into { i32, i8 } here, but that aggregate return +; causes rustc's call-site ABI to attach `align N` to the return value, which +; LLVM 19's verifier rejects (align is only valid on pointer returns). So we +; pack into a plain i64 instead: low 32 bits = value, bit 32 = predicate. +; Primitive integer return ⇒ no struct ABI ⇒ no spurious return-attribute. + +define i64 @__nvvm_warp_shuffle(i32, i32, i32, i32, i32) #1 { start: - %5 = call { i32, i1 } @llvm.nvvm.shfl.sync.i32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) - %6 = extractvalue { i32, i1 } %5, 1 - %7 = zext i1 %6 to i8 - %8 = extractvalue { i32, i1 } %5, 0 - %9 = insertvalue { i32, i8 } undef, i32 %8, 0 - %10 = insertvalue { i32, i8 } %9, i8 %7, 1 - ret { i32, i8 } %10 + %r = call { i32, i1 } @llvm.nvvm.shfl.sync.i32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) + %val = extractvalue { i32, i1 } %r, 0 + %pred = extractvalue { i32, i1 } %r, 1 + %val64 = zext i32 %val to i64 + %pred64 = zext i1 %pred to i64 + %pred_hi = shl i64 %pred64, 32 + %packed = or i64 %val64, %pred_hi + ret i64 %packed } declare { i32, i1 } @llvm.nvvm.shfl.sync.i32(i32, i32, i32, i32, i32) #1 -define { i32, i8 } @__nvvm_warp_match_all_32(i32, i32) { +define i64 @__nvvm_warp_match_all_32(i32, i32) { start: - %2 = call { i32, i1 } @llvm.nvvm.match.all.sync.i32(i32 %0, i32 %1) - %3 = extractvalue { i32, i1 } %2, 1 - %4 = zext i1 %3 to i8 - %5 = extractvalue { i32, i1 } %2, 0 - %6 = insertvalue { i32, i8 } undef, i32 %5, 0 - %7 = insertvalue { i32, i8 } %6, i8 %4, 1 - ret { i32, i8 } %7 + %r = call { i32, i1 } @llvm.nvvm.match.all.sync.i32(i32 %0, i32 %1) + %val = extractvalue { i32, i1 } %r, 0 + %pred = extractvalue { i32, i1 } %r, 1 + %val64 = zext i32 %val to i64 + %pred64 = zext i1 %pred to i64 + %pred_hi = shl i64 %pred64, 32 + %packed = or i64 %val64, %pred_hi + ret i64 %packed } declare { i32, i1 } @llvm.nvvm.match.all.sync.i32(i32, i32) #1 -define { i32, i8 } @__nvvm_warp_match_all_64(i32, i64) { +define i64 @__nvvm_warp_match_all_64(i32, i64) { start: - %2 = call { i32, i1 } @llvm.nvvm.match.all.sync.i64(i32 %0, i64 %1) - %3 = extractvalue { i32, i1 } %2, 1 - %4 = zext i1 %3 to i8 - %5 = extractvalue { i32, i1 } %2, 0 - %6 = insertvalue { i32, i8 } undef, i32 %5, 0 - %7 = insertvalue { i32, i8 } %6, i8 %4, 1 - ret { i32, i8 } %7 + %r = call { i32, i1 } @llvm.nvvm.match.all.sync.i64(i32 %0, i64 %1) + %val = extractvalue { i32, i1 } %r, 0 + %pred = extractvalue { i32, i1 } %r, 1 + %val64 = zext i32 %val to i64 + %pred64 = zext i1 %pred to i64 + %pred_hi = shl i64 %pred64, 32 + %packed = or i64 %val64, %pred_hi + ret i64 %packed } declare { i32, i1 } @llvm.nvvm.match.all.sync.i64(i32, i64) #1 diff --git a/examples/vecadd/Cargo.toml b/examples/vecadd/Cargo.toml index 4de99985..8686504a 100644 --- a/examples/vecadd/Cargo.toml +++ b/examples/vecadd/Cargo.toml @@ -3,6 +3,13 @@ name = "vecadd" version = "0.1.0" edition = "2024" +[features] +default = [] +# Build the kernels crate with the LLVM 19 backend. Forwards to `cuda_builder`, +# which propagates the feature to `nvvm` (default arch -> Compute100 / Blackwell) +# and to its nested `cargo build -p rustc_codegen_nvvm`. +llvm19 = ["cuda_builder/llvm19"] + [dependencies] cust = { path = "../../crates/cust" } cust_raw = { path = "../../crates/cust_raw", default-features = false, features = ["driver"] } diff --git a/flake.nix b/flake.nix index 2bbf0563..c5a0f761 100644 --- a/flake.nix +++ b/flake.nix @@ -10,167 +10,155 @@ outputs = { nixpkgs, nixpkgs-llvm7, rust-overlay, ... }: let - system = "x86_64-linux"; - # allowUnfree is required because CUDA is unfree. - pkgs = import nixpkgs { - inherit system; - config.allowUnfree = true; - overlays = [ rust-overlay.overlays.default ]; - }; - pkgsLlvm7 = import nixpkgs-llvm7 { inherit system; }; - lib = pkgs.lib; + systems = [ "aarch64-linux" "x86_64-linux" ]; + forAllSystems = nixpkgs.lib.genAttrs systems; - # ---- CUDA toolkit (Nix-managed) ---- - # The NVIDIA **driver** (libcuda.so.1, libnvidia-*) still comes from the - # host — apt on Debian, hardware.nvidia on NixOS. Nix only provides the - # **toolkit** (nvcc, libnvvm, cudart, headers). - # - # Toolkit pin chooses what PTX version NVVM emits, which then dictates - # the minimum host driver version at runtime: - # CUDA 13.2 → NVVM 22.0 → PTX 9.2 → needs driver 580.x+ (CUDA 13) - # CUDA 12.9 → NVVM 21.x → PTX 8.x → runs on CUDA 12.x drivers - # `cudatoolkit` is the kitchen-sink symlinkJoin maintained by nixpkgs — - # every header path and lib layout is already wired correctly. - cuda19Root = pkgs.cudaPackages_13_2.cudatoolkit; - cuda7Root = pkgs.cudaPackages_12_9.cudatoolkit; + mkShells = system: + let + # allowUnfree is required because CUDA is unfree. + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + overlays = [ rust-overlay.overlays.default ]; + }; + pkgsLlvm7 = import nixpkgs-llvm7 { inherit system; }; + lib = pkgs.lib; - driverLibDir = "/usr/lib/x86_64-linux-gnu"; + # ---- CUDA toolkit (Nix-managed) ---- + # Toolkit pin chooses what PTX version NVVM emits, which then dictates + # the minimum host driver version at runtime: + # CUDA 13.2 → NVVM 22.0 → PTX 9.2 → needs driver 580.x+ (CUDA 13) + # CUDA 12.9 → NVVM 21.x → PTX 8.x → runs on CUDA 12.x drivers + # `cudatoolkit` is the kitchen-sink symlinkJoin maintained by nixpkgs — + # every header path and lib layout is already wired correctly. The host + # NVIDIA driver (libcuda.so.1) is needed at runtime; it is *not* shimmed + # in here — supply it via the system or extend LD_LIBRARY_PATH yourself + # before running CUDA programs. + cuda19Root = pkgs.cudaPackages_13_2.cudatoolkit; + cuda7Root = pkgs.cudaPackages_12_9.cudatoolkit; - toolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; + toolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; - # ---- LLVM 19 (from current nixpkgs) ---- - llvm19 = pkgs.llvmPackages_19; - llvm19Bin = lib.getBin llvm19.llvm; - llvm19Dev = lib.getDev llvm19.llvm; - llvm19CompatTools = pkgs.symlinkJoin { - name = "llvm19-compat-tools"; - paths = [ - (pkgs.writeShellScriptBin "opt-19" ''exec ${llvm19Bin}/bin/opt "$@"'') - (pkgs.writeShellScriptBin "llvm-as-19" ''exec ${llvm19Bin}/bin/llvm-as "$@"'') - (pkgs.writeShellScriptBin "llvm-dis-19" ''exec ${llvm19Bin}/bin/llvm-dis "$@"'') - (pkgs.writeShellScriptBin "llc-19" ''exec ${llvm19Bin}/bin/llc "$@"'') - ]; - }; + # ---- LLVM 19 (from current nixpkgs) ---- + llvm19 = pkgs.llvmPackages_19; + llvm19Bin = lib.getBin llvm19.llvm; + llvm19Dev = lib.getDev llvm19.llvm; + llvm19CompatTools = pkgs.symlinkJoin { + name = "llvm19-compat-tools"; + paths = [ + (pkgs.writeShellScriptBin "opt-19" ''exec ${llvm19Bin}/bin/opt "$@"'') + (pkgs.writeShellScriptBin "llvm-as-19" ''exec ${llvm19Bin}/bin/llvm-as "$@"'') + (pkgs.writeShellScriptBin "llvm-dis-19" ''exec ${llvm19Bin}/bin/llvm-dis "$@"'') + (pkgs.writeShellScriptBin "llc-19" ''exec ${llvm19Bin}/bin/llc "$@"'') + ]; + }; - # ---- LLVM 7.1.0 (from pinned nixos-23.05 nixpkgs) ---- - llvm7Pkg = pkgsLlvm7.llvmPackages_7.llvm; - llvm7Bin = pkgsLlvm7.lib.getBin llvm7Pkg; - llvm7Dev = pkgsLlvm7.lib.getDev llvm7Pkg; - llvm7CompatTools = pkgs.symlinkJoin { - name = "llvm7-compat-tools"; - paths = [ - (pkgs.writeShellScriptBin "llvm-config-7" ''exec ${llvm7Dev}/bin/llvm-config "$@"'') - (pkgs.writeShellScriptBin "llvm-as-7" ''exec ${llvm7Bin}/bin/llvm-as "$@"'') - (pkgs.writeShellScriptBin "llvm-dis-7" ''exec ${llvm7Bin}/bin/llvm-dis "$@"'') - (pkgs.writeShellScriptBin "llc-7" ''exec ${llvm7Bin}/bin/llc "$@"'') - (pkgs.writeShellScriptBin "opt-7" ''exec ${llvm7Bin}/bin/opt "$@"'') - ]; - }; + # ---- LLVM 7.1.0 (from pinned nixos-23.05 nixpkgs) ---- + llvm7Pkg = pkgsLlvm7.llvmPackages_7.llvm; + llvm7Bin = pkgsLlvm7.lib.getBin llvm7Pkg; + llvm7Dev = pkgsLlvm7.lib.getDev llvm7Pkg; + llvm7CompatTools = pkgs.symlinkJoin { + name = "llvm7-compat-tools"; + paths = [ + (pkgs.writeShellScriptBin "llvm-config-7" ''exec ${llvm7Dev}/bin/llvm-config "$@"'') + (pkgs.writeShellScriptBin "llvm-as-7" ''exec ${llvm7Bin}/bin/llvm-as "$@"'') + (pkgs.writeShellScriptBin "llvm-dis-7" ''exec ${llvm7Bin}/bin/llvm-dis "$@"'') + (pkgs.writeShellScriptBin "llc-7" ''exec ${llvm7Bin}/bin/llc "$@"'') + (pkgs.writeShellScriptBin "opt-7" ''exec ${llvm7Bin}/bin/opt "$@"'') + ]; + }; - # ---- Shared bits across both shells ---- - commonNativeInputs = [ - toolchain - pkgs.pkg-config - pkgs.cmake - pkgs.ninja - ]; - # The v19 shell uses unstable's runtime libs (modern glibc). The v7 shell has - # to match LLVM 7's glibc generation (23.05), otherwise ncurses/libstdc++ from - # unstable demand GLIBC_2.38+ symbols LLVM 7's linked glibc 2.37 doesn't have. - v19BuildInputs = [ - pkgs.openssl - pkgs.libxml2 - pkgs.zlib - pkgs.ncurses - pkgs.stdenv.cc.cc.lib - ]; - v7BuildInputs = [ - pkgsLlvm7.openssl - pkgsLlvm7.libxml2 - pkgsLlvm7.zlib - pkgsLlvm7.ncurses - pkgsLlvm7.stdenv.cc.cc.lib - ]; - mkCudaEnv = root: { - CUDA_HOME = "${root}"; - CUDA_ROOT = "${root}"; - CUDA_PATH = "${root}"; - CUDA_TOOLKIT_ROOT_DIR = "${root}"; - # Cover both lib/ (nix-style) and lib64/ (FHS-style) so downstream - # build.rs scripts that probe either layout resolve libcudart + stubs. - CUDA_LIBRARY_PATH = - "${root}/lib:${root}/lib64:${root}/lib/stubs:${root}/lib64/stubs"; - }; - # Symlink every NVIDIA-shipped driver library (libcuda, libnvidia-*) into a - # single shim dir that we then stick on LD_LIBRARY_PATH. libcuda alone is not - # enough: the driver will dlopen companions like libnvidia-ptxjitcompiler.so.1 - # when JITing PTX, and failing to find them surfaces as - # CUDA_ERROR_JIT_COMPILER_NOT_FOUND from cuModuleLoadDataEx. - driverShimHook = '' - driver_shim_dir="$PWD/.nix-driver-libs" - mkdir -p "$driver_shim_dir" - for src in "${driverLibDir}"/libcuda.so* "${driverLibDir}"/libnvidia-*.so*; do - [ -e "$src" ] || continue - ln -sf "$src" "$driver_shim_dir/$(basename "$src")" - done - ''; + # ---- Shared bits across both shells ---- + commonNativeInputs = [ + toolchain + pkgs.pkg-config + pkgs.cmake + pkgs.ninja + ]; + # The v19 shell uses unstable's runtime libs (modern glibc). The v7 shell has + # to match LLVM 7's glibc generation (23.05), otherwise ncurses/libstdc++ from + # unstable demand GLIBC_2.38+ symbols LLVM 7's linked glibc 2.37 doesn't have. + v19BuildInputs = [ + pkgs.openssl + pkgs.libxml2 + pkgs.zlib + pkgs.ncurses + pkgs.stdenv.cc.cc.lib + ]; + v7BuildInputs = [ + pkgsLlvm7.openssl + pkgsLlvm7.libxml2 + pkgsLlvm7.zlib + pkgsLlvm7.ncurses + pkgsLlvm7.stdenv.cc.cc.lib + ]; + mkCudaEnv = root: { + CUDA_HOME = "${root}"; + CUDA_ROOT = "${root}"; + CUDA_PATH = "${root}"; + CUDA_TOOLKIT_ROOT_DIR = "${root}"; + # Cover both lib/ (nix-style) and lib64/ (FHS-style) so downstream + # build.rs scripts that probe either layout resolve libcudart + stubs. + CUDA_LIBRARY_PATH = + "${root}/lib:${root}/lib64:${root}/lib/stubs:${root}/lib64/stubs"; + }; - # ---- LLVM 7-only shell (CUDA 12.9 toolkit) ---- - v7Shell = pkgs.mkShell ((mkCudaEnv cuda7Root) // { - nativeBuildInputs = commonNativeInputs ++ [ - cuda7Root - llvm7Bin - llvm7Dev - llvm7CompatTools - pkgsLlvm7.llvmPackages_7.clang - pkgsLlvm7.llvmPackages_7.libclang - ]; - buildInputs = v7BuildInputs; - LLVM_CONFIG = "${llvm7Dev}/bin/llvm-config"; - # Give bindgen an explicit libclang (matched to 23.05's glibc) so it doesn't - # fall back to scanning system paths and pick up an apt-installed LLVM 19 - # with deps the v7 shell's LD_LIBRARY_PATH doesn't satisfy. - LIBCLANG_PATH = "${pkgsLlvm7.lib.getLib pkgsLlvm7.llvmPackages_7.libclang}/lib"; - shellHook = driverShimHook + '' - export PATH="${llvm7CompatTools}/bin:${llvm7Bin}/bin:${llvm7Dev}/bin:${cuda7Root}/bin:${cuda7Root}/nvvm/bin:$PATH" - export LD_LIBRARY_PATH="$driver_shim_dir:${cuda7Root}/nvvm/lib:${cuda7Root}/nvvm/lib64:${cuda7Root}/lib64:${cuda7Root}/lib:${pkgsLlvm7.ncurses.out}/lib:${pkgsLlvm7.libxml2.out}/lib:${pkgsLlvm7.zlib.out}/lib:${pkgsLlvm7.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + # ---- LLVM 7-only shell (CUDA 12.9 toolkit) ---- + v7Shell = pkgs.mkShell ((mkCudaEnv cuda7Root) // { + nativeBuildInputs = commonNativeInputs ++ [ + cuda7Root + llvm7Bin + llvm7Dev + llvm7CompatTools + pkgsLlvm7.llvmPackages_7.clang + pkgsLlvm7.llvmPackages_7.libclang + ]; + buildInputs = v7BuildInputs; + LLVM_CONFIG = "${llvm7Dev}/bin/llvm-config"; + # Give bindgen an explicit libclang (matched to 23.05's glibc) so it doesn't + # fall back to scanning system paths and pick up an apt-installed LLVM 19 + # with deps the v7 shell's LD_LIBRARY_PATH doesn't satisfy. + LIBCLANG_PATH = "${pkgsLlvm7.lib.getLib pkgsLlvm7.llvmPackages_7.libclang}/lib"; + shellHook = '' + export PATH="${llvm7CompatTools}/bin:${llvm7Bin}/bin:${llvm7Dev}/bin:${cuda7Root}/bin:${cuda7Root}/nvvm/bin:$PATH" + export LD_LIBRARY_PATH="${cuda7Root}/nvvm/lib:${cuda7Root}/nvvm/lib64:${cuda7Root}/lib64:${cuda7Root}/lib:${pkgsLlvm7.ncurses.out}/lib:${pkgsLlvm7.libxml2.out}/lib:${pkgsLlvm7.zlib.out}/lib:${pkgsLlvm7.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" - echo "rust-cuda llvm7 shell" - echo " CUDA_HOME=$CUDA_HOME" - echo " LLVM_CONFIG=$LLVM_CONFIG" - echo " NVIDIA_DRIVER_LIB=$driver_shim_dir/libcuda.so.1" - ''; - }); + echo "rust-cuda llvm7 shell (${system})" + echo " CUDA_HOME=$CUDA_HOME" + echo " LLVM_CONFIG=$LLVM_CONFIG" + ''; + }); - # ---- LLVM 19-only shell (CUDA 13.2 toolkit, the active-work shell) ---- - v19Shell = pkgs.mkShell ((mkCudaEnv cuda19Root) // { - nativeBuildInputs = commonNativeInputs ++ [ - cuda19Root - llvm19.clang - llvm19.libclang - llvm19Bin - llvm19Dev - llvm19CompatTools - ]; - buildInputs = v19BuildInputs; - LLVM_CONFIG_19 = "${llvm19Dev}/bin/llvm-config"; - LIBCLANG_PATH = "${lib.getLib llvm19.libclang}/lib"; - shellHook = driverShimHook + '' - export PATH="${llvm19CompatTools}/bin:${llvm19Bin}/bin:${llvm19Dev}/bin:${cuda19Root}/bin:${cuda19Root}/nvvm/bin:$PATH" - export LD_LIBRARY_PATH="$driver_shim_dir:${cuda19Root}/nvvm/lib:${cuda19Root}/nvvm/lib64:${cuda19Root}/lib64:${cuda19Root}/lib:${pkgs.ncurses.out}/lib:${pkgs.libxml2.out}/lib:${pkgs.zlib.out}/lib:${pkgs.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + # ---- LLVM 19-only shell (CUDA 13.2 toolkit, the active-work shell) ---- + v19Shell = pkgs.mkShell ((mkCudaEnv cuda19Root) // { + nativeBuildInputs = commonNativeInputs ++ [ + cuda19Root + llvm19.clang + llvm19.libclang + llvm19Bin + llvm19Dev + llvm19CompatTools + ]; + buildInputs = v19BuildInputs; + LLVM_CONFIG_19 = "${llvm19Dev}/bin/llvm-config"; + LIBCLANG_PATH = "${lib.getLib llvm19.libclang}/lib"; + shellHook = '' + export PATH="${llvm19CompatTools}/bin:${llvm19Bin}/bin:${llvm19Dev}/bin:${cuda19Root}/bin:${cuda19Root}/nvvm/bin:$PATH" + export LD_LIBRARY_PATH="${cuda19Root}/nvvm/lib:${cuda19Root}/nvvm/lib64:${cuda19Root}/lib64:${cuda19Root}/lib:${pkgs.ncurses.out}/lib:${pkgs.libxml2.out}/lib:${pkgs.zlib.out}/lib:${pkgs.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" - echo "rust-cuda llvm19 shell" - echo " CUDA_HOME=$CUDA_HOME" - echo " LLVM_CONFIG_19=$LLVM_CONFIG_19" - echo " NVIDIA_DRIVER_LIB=$driver_shim_dir/libcuda.so.1" - ''; - }); + echo "rust-cuda llvm19 shell (${system})" + echo " CUDA_HOME=$CUDA_HOME" + echo " LLVM_CONFIG_19=$LLVM_CONFIG_19" + ''; + }); + in + { + default = v19Shell; + v7 = v7Shell; + v19 = v19Shell; + }; in { - devShells.${system} = { - default = v19Shell; - v7 = v7Shell; - v19 = v19Shell; - }; + devShells = forAllSystems mkShells; }; }