From 569b46d1e7cd66d9f41326618bb31ae5599b8cd0 Mon Sep 17 00:00:00 2001 From: Prateek Chawla Date: Mon, 23 Mar 2026 09:20:04 +0100 Subject: [PATCH 1/9] Updates to work with CUDA 13.x Signed-off-by: Prateek Chawla --- 03-H_Multi_GPU_Parallelization/.master/Makefile.in | 4 ++-- 03-H_Multi_GPU_Parallelization/.master/jacobi.cu | 2 +- 03-H_Multi_GPU_Parallelization/solutions/Makefile | 4 ++-- 03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 4 ++-- 03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu | 2 +- 03-H_Multi_GPU_Parallelization/solutions/jacobi.cu | 2 +- 03-H_Multi_GPU_Parallelization/tasks/Makefile | 4 ++-- 03-H_Multi_GPU_Parallelization/tasks/jacobi.cu | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in index e15d85c..1ef54aa 100644 --- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in +++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu index a9700cc..2d913d9 100644 --- a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile index 92f033c..96c6077 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile index a6399eb..25849b3 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu index ee32ce5..ed6a7ff 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu index e971307..c4b542a 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile index d293686..67aae3c 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/Makefile +++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu index acae736..150886a 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; From 85d7e901819b814a447cb020c9f4c699d1798e2f Mon Sep 17 00:00:00 2001 From: Prateek Chawla Date: Tue, 24 Mar 2026 12:06:49 +0100 Subject: [PATCH 2/9] update to c++17 Signed-off-by: Prateek Chawla --- 03-H_Multi_GPU_Parallelization/.master/Makefile.in | 2 +- 03-H_Multi_GPU_Parallelization/solutions/Makefile | 2 +- 03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 2 +- 03-H_Multi_GPU_Parallelization/tasks/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in index 1ef54aa..72429af 100644 --- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in +++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in @@ -24,7 +24,7 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile index 96c6077..6d3af79 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile @@ -24,7 +24,7 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile index 25849b3..5f96c74 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile @@ -24,7 +24,7 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile index 67aae3c..883cd2c 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/Makefile +++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile @@ -24,7 +24,7 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o From 61355845e0546eee58239ffd39ab258ee9b903d0 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 12:34:13 +0100 Subject: [PATCH 3/9] And Action --- .github/workflows/ci.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..87543f3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev + - name: Build first example + working-directory: 03-H_Multi_GPU_Parallelization/solutions + env: + MPI_HOME: /usr + run: | + make jacobi From 47a825ea25884c216c8bd87cc1917b3715e5dee6 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:27:08 +0100 Subject: [PATCH 4/9] Add CUDA container image --- .github/workflows/ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87543f3..fe6d382 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,20 +1,19 @@ name: CI - on: push: branches: [ main ] pull_request: branches: [ main ] - jobs: build: runs-on: ubuntu-latest + container: + image: nvidia/cuda:13.2.0-devel-ubuntu24.04 steps: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev + apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev - name: Build first example working-directory: 03-H_Multi_GPU_Parallelization/solutions env: From 808cde8e0c668b1d0c640dc7c6305b1f61f55559 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:32:52 +0100 Subject: [PATCH 5/9] fix OpenMPI path --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe6d382..4bc44d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,6 @@ jobs: - name: Build first example working-directory: 03-H_Multi_GPU_Parallelization/solutions env: - MPI_HOME: /usr - run: | + MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi + run: | make jacobi From 2574a62694ffa6e2a5c392acb33a99a98ece6562 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:43:15 +0100 Subject: [PATCH 6/9] Fix vim auto-linting --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bc44d1..803e40a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,5 +18,5 @@ jobs: working-directory: 03-H_Multi_GPU_Parallelization/solutions env: MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi - run: | + run: | make jacobi From 7475b071952d47982f6b8686ea2bbcf7d6ee7811 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:41:10 +0100 Subject: [PATCH 7/9] Expand coverage --- .github/workflows/ci.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 803e40a..8499b70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,15 @@ on: branches: [ main ] jobs: build: + strategy: + matrix: + exercise: + - 03-H_Multi_GPU_Parallelization/solutions + - 06-H_Overlap_Communication_and_Computation_MPI/solutions + - 08-H_NCCL_NVSHMEM/solutions/NCCL + - 08-H_NCCL_NVSHMEM/solutions/NVSHMEM + - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/ + - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/ runs-on: ubuntu-latest container: image: nvidia/cuda:13.2.0-devel-ubuntu24.04 @@ -15,7 +24,7 @@ jobs: run: | apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev - name: Build first example - working-directory: 03-H_Multi_GPU_Parallelization/solutions + working-directory: ${{ matrix.exercise }} env: MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi run: | From 1e5143846c9ccf432a8fa1de206fc8a43bb91bbf Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:47:43 +0100 Subject: [PATCH 8/9] Small correction --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8499b70..aa6374e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,8 +13,8 @@ jobs: - 06-H_Overlap_Communication_and_Computation_MPI/solutions - 08-H_NCCL_NVSHMEM/solutions/NCCL - 08-H_NCCL_NVSHMEM/solutions/NVSHMEM - - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/ - - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/ + - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM + - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs runs-on: ubuntu-latest container: image: nvidia/cuda:13.2.0-devel-ubuntu24.04 From bc076ffd230f82c40204652caec600012080353c Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:55:00 +0100 Subject: [PATCH 9/9] Fail slow --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa6374e..84b3a00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: jobs: build: strategy: + fail-fast: false matrix: exercise: - 03-H_Multi_GPU_Parallelization/solutions