From 57752cb3f8189f7f419e118ea68dc9a6abef334c Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 17:25:12 +0000 Subject: [PATCH 01/13] checkpoint first version --- .github/workflows/build-wheel.yml | 10 ++++++ .github/workflows/test-wheel-linux.yml | 13 +++++++ .gitignore | 3 ++ ci/tools/env-vars | 1 + cuda_core/tests/test_module.py | 49 ++++++++++++++++++++++++++ 5 files changed, 76 insertions(+) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index e289f3da697..0d6ca87df1d 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -456,6 +456,16 @@ jobs: path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }} if-no-files-found: error + - name: Build cuda.core test binaries + run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh + + - name: Upload cuda.core test binaries + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + if-no-files-found: error + # Note: This overwrites CUDA_PATH etc - name: Set up mini CTK uses: ./.github/actions/fetch_ctk diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index cb5dbc4c866..72ee298f77a 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -279,6 +279,19 @@ jobs: pwd ls -lahR $CUDA_CORE_CYTHON_TESTS_DIR + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + pwd + ls -lahR $CUDA_CORE_TEST_BINARIES_DIR + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.gitignore b/.gitignore index c55480b7551..d2a5bf6e52e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ cache_driver cache_runtime cache_nvrtc +# cuda.core test object fixtures built locally / downloaded as CI artifacts +cuda_core/tests/test_binaries/*.o + # CUDA Python specific (auto-generated) cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx diff --git a/ci/tools/env-vars b/ci/tools/env-vars index 30fac1cdce8..8ffbfa13472 100755 --- a/ci/tools/env-vars +++ b/ci/tools/env-vars @@ -34,6 +34,7 @@ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${HOST_ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${SHA}" echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/cuda_core/dist")" echo "CUDA_CORE_CYTHON_TESTS_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/cython")" + echo "CUDA_CORE_TEST_BINARIES_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/test_binaries")" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" } >> $GITHUB_ENV diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3a438f825a0..ebeb0775808 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -172,6 +172,35 @@ def get_saxpy_fatbin(init_cuda): return bytes(fatbin), sym_map +@pytest.fixture(scope="module") +def get_saxpy_object(): + """Read the pre-built saxpy.o. + + In CI: produced by build stage into a test wheel file. + In local dev: auto-built on demand if nvcc is available; if you edit + saxpy.cu, remove the stale saxpy.o to force a rebuild. + """ + import shutil + import subprocess + from pathlib import Path + + binaries_dir = Path(__file__).parent / "test_binaries" + obj_path = binaries_dir / "saxpy.o" + + if not obj_path.is_file(): + if shutil.which("nvcc") is None: + pytest.skip( + f"saxpy.o not found at {obj_path} and nvcc is unavailable. " + "In CI this is downloaded from the build stage." + ) + subprocess.run( # noqa: S603 + ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 + check=True, + ) + + return obj_path.read_bytes() + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -330,6 +359,26 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p mod_obj.get_kernel("saxpy") # force loading +def test_object_code_load_object(get_saxpy_object): + obj = get_saxpy_object + assert isinstance(obj, bytes) + mod_obj = ObjectCode.from_object(obj) + assert mod_obj.code == obj + assert mod_obj.code_type == "object" + # object code is only valid as linker input; get_kernel is unsupported + with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): + mod_obj.get_kernel("saxpy") + + +def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): + obj_file = tmp_path / "test.o" + obj_file.write_bytes(get_saxpy_object) + arg = str(obj_file) + mod_obj = ObjectCode.from_object(arg) + assert mod_obj.code == arg + assert mod_obj.code_type == "object" + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From 7244aeed5ef47922a527b2b6dcf57806be3ae441 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 18:34:58 +0000 Subject: [PATCH 02/13] add test_binaries folder --- .../tests/test_binaries/build_test_binaries.sh | 14 ++++++++++++++ cuda_core/tests/test_binaries/saxpy.cu | 12 ++++++++++++ 2 files changed, 26 insertions(+) create mode 100755 cuda_core/tests/test_binaries/build_test_binaries.sh create mode 100644 cuda_core/tests/test_binaries/saxpy.cu diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh new file mode 100755 index 00000000000..6e4de10b261 --- /dev/null +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# Build .o test fixtures. Invoked at CI build stage + +SCRIPTPATH=$(dirname "$(realpath "$0")") + +nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" + +ls -lah "${SCRIPTPATH}/saxpy.o" diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu new file mode 100644 index 00000000000..6ac3fa714d1 --- /dev/null +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include + +template +__global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + out[tid] = a * x[tid] + y[tid]; + } +} From 2c4b18ca94a536600931993af04939a3f6e32b58 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Thu, 11 Jun 2026 16:30:04 +0000 Subject: [PATCH 03/13] add download test binaries in test-wheel-windows.yaml --- .github/workflows/test-wheel-windows.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 6ccacfff8f1..6db50b89da7 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -259,6 +259,19 @@ jobs: Get-Location Get-ChildItem -Recurse -Force $env:CUDA_CORE_CYTHON_TESTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_CORE_TEST_BINARIES_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: From dab5d51156166ee12022ce8ea7afe4e91af068b5 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:06:48 +0000 Subject: [PATCH 04/13] use device cuda/std/cstddef --- cuda_core/tests/test_binaries/saxpy.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 6ac3fa714d1..9e380186635 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -#include +#include template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { From 6c836315132374487a9116b60724cd7ca17dd25a Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:16:27 +0000 Subject: [PATCH 05/13] replace shutil with find_nvidia_binary_utility(nvcc) --- cuda_core/tests/test_module.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index ebeb0775808..f2a663238ff 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,15 +180,16 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ - import shutil import subprocess from pathlib import Path + from cuda.pathfinder import find_nvidia_binary_utility + binaries_dir = Path(__file__).parent / "test_binaries" obj_path = binaries_dir / "saxpy.o" if not obj_path.is_file(): - if shutil.which("nvcc") is None: + if find_nvidia_binary_utility("nvcc") is None: pytest.skip( f"saxpy.o not found at {obj_path} and nvcc is unavailable. " "In CI this is downloaded from the build stage." From 673a61dba9e2b58c71289375cb18629a521fe386 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 22:17:02 +0000 Subject: [PATCH 06/13] add testing with linker --- cuda_core/tests/test_binaries/saxpy.cu | 4 +++ cuda_core/tests/test_module.py | 45 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 9e380186635..f0afc58637c 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -3,6 +3,10 @@ #include +__device__ float saxpy_step(float a, float x, float y) { + return a * x + y; +} + template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f2a663238ff..f82777a95f4 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -8,7 +8,7 @@ import pytest import cuda.core -from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions +from cuda.core import Device, Kernel, Linker, LinkerOptions, ObjectCode, Program, ProgramOptions from cuda.core._program import _can_load_generated_ptx from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return from cuda.core._utils.version import binding_version, driver_version @@ -366,7 +366,6 @@ def test_object_code_load_object(get_saxpy_object): mod_obj = ObjectCode.from_object(obj) assert mod_obj.code == obj assert mod_obj.code_type == "object" - # object code is only valid as linker input; get_kernel is unsupported with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): mod_obj.get_kernel("saxpy") @@ -380,6 +379,48 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): assert mod_obj.code_type == "object" +def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): + arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability) + kernel_ptx = Program( + r""" + extern __device__ float saxpy_step(float a, float x, float y); + extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { + if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); + } + """, + "c++", + ProgramOptions(relocatable_device_code=True, arch=arch), + ).compile("ptx") + linked = Linker( + kernel_ptx, + ObjectCode.from_object(get_saxpy_object), + options=LinkerOptions(arch=arch), + ).link("cubin") + kernel = linked.get_kernel("linked_kernel") + + import numpy as np + + stream = init_cuda.create_stream() + host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) + result = np.from_dlpack(host_buf).view(np.float32) + result[:] = 0.0 + dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) + + cuda.core.launch( + stream, + cuda.core.LaunchConfig(grid=1, block=1), + kernel, + np.float32(2.0), + np.float32(3.0), + np.float32(4.0), + dev_buf, + ) + dev_buf.copy_to(host_buf, stream=stream) + stream.sync() + + assert result[0] == 10.0 + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From 61d0e37927711ff35b3fee2751632983b998706e Mon Sep 17 00:00:00 2001 From: Jinfeng Li Date: Mon, 15 Jun 2026 13:28:47 -0400 Subject: [PATCH 07/13] Update cuda_core/tests/test_module.py Co-authored-by: Leo Fang --- cuda_core/tests/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f82777a95f4..bdf90061e89 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -380,7 +380,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): - arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability) + arch = init_cuda.arch kernel_ptx = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); From 4c1a5ad5366fd77b1f95299cb67f8ef97b5daabe Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:25:26 +0000 Subject: [PATCH 08/13] add windows nvcc flag required by CCCL cuda/std/cstddef --- cuda_core/tests/test_binaries/build_test_binaries.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 6e4de10b261..33014ead170 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -9,6 +9,12 @@ set -euo pipefail SCRIPTPATH=$(dirname "$(realpath "$0")") -nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" +NVCC_EXTRA_FLAGS=() +if [[ "${OS:-}" == "Windows_NT" ]]; then + # CCCL headers (e.g. cuda/std/cstddef) require MSVC's conforming preprocessor. + NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) +fi + +nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" ls -lah "${SCRIPTPATH}/saxpy.o" From c9edf2b0e504cb8766395a45c6b8de58d84df00a Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:29:05 +0000 Subject: [PATCH 09/13] addressing comment --- cuda_core/tests/test_module.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index bdf90061e89..3bb73057288 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,6 +180,7 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ + import os import subprocess from pathlib import Path @@ -197,6 +198,7 @@ def get_saxpy_object(): subprocess.run( # noqa: S603 ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 check=True, + env=os.environ, ) return obj_path.read_bytes() From ce8a8123980c2f0ee1b19b1564fd6988973318f6 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:40:12 +0000 Subject: [PATCH 10/13] move imports to top --- cuda_core/tests/test_module.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3bb73057288..d5b22afc88a 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -2,9 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 import ctypes +import os import pickle +import subprocess import warnings +from pathlib import Path +import numpy as np import pytest import cuda.core @@ -12,6 +16,7 @@ from cuda.core._program import _can_load_generated_ptx from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return from cuda.core._utils.version import binding_version, driver_version +from cuda.pathfinder import find_nvidia_binary_utility try: import numba @@ -180,12 +185,6 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ - import os - import subprocess - from pathlib import Path - - from cuda.pathfinder import find_nvidia_binary_utility - binaries_dir = Path(__file__).parent / "test_binaries" obj_path = binaries_dir / "saxpy.o" @@ -400,8 +399,6 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): ).link("cubin") kernel = linked.get_kernel("linked_kernel") - import numpy as np - stream = init_cuda.create_stream() host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) result = np.from_dlpack(host_buf).view(np.float32) From 4575865c283593aceff465e5d26318f8067057a9 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:58:36 +0000 Subject: [PATCH 11/13] add c++17 as required by cccl windows --- cuda_core/tests/test_binaries/build_test_binaries.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 33014ead170..00299f76724 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -9,9 +9,8 @@ set -euo pipefail SCRIPTPATH=$(dirname "$(realpath "$0")") -NVCC_EXTRA_FLAGS=() +NVCC_EXTRA_FLAGS=(-std=c++17) if [[ "${OS:-}" == "Windows_NT" ]]; then - # CCCL headers (e.g. cuda/std/cstddef) require MSVC's conforming preprocessor. NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) fi From e2ad6c1c7a00804a8659789181ffd62c9b372876 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 18:45:31 +0000 Subject: [PATCH 12/13] fix test failure due to using init_cuda.arch instead of sm_{init_cuda.arch} --- cuda_core/tests/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index d5b22afc88a..e5b3d785899 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -381,7 +381,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): - arch = init_cuda.arch + arch = f"sm_{init_cuda.arch}" kernel_ptx = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); From c224dfcb6cae37a390eb84eb5b4e9fc0b12bde7e Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 21:09:04 +0000 Subject: [PATCH 13/13] avoid linking kernel_ptx with cubin that triggers nvrtc nvjitlink version mismatch in ci blackwell gpu --- cuda_core/tests/test_module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index e5b3d785899..524dd471345 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -382,7 +382,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): arch = f"sm_{init_cuda.arch}" - kernel_ptx = Program( + kernel_code = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { @@ -391,9 +391,9 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): """, "c++", ProgramOptions(relocatable_device_code=True, arch=arch), - ).compile("ptx") + ).compile("cubin") linked = Linker( - kernel_ptx, + kernel_code, ObjectCode.from_object(get_saxpy_object), options=LinkerOptions(arch=arch), ).link("cubin")